From ae74e78909ae0bc476112fb43b9580e969879dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Fri, 15 Oct 2021 12:05:32 +0200 Subject: Remove WebM support (and deps libvpx and opus) We've had many issues with WebM support and specifically the libvpx library over the years, mostly due to its poor integration in Godot's buildsystem, but without anyone really interested in improving this state. With the new GDExtensions in Godot 4.0, we intend to move video decoding to first-party extensions, and this would likely be done using something like libvlc to expose more codecs. Removing the `webm` module means we can remove libsimplewebm, libvpx and opus, which we were only used for that purpose. Both libvpx and opus were fairly complex pieces of the buildsystem, so this is a nice cleanup. This also removes the compile-time dependency on `yasm`. Fixes lots of compilation or non-working WebM issues which will be linked in the PR. --- COPYRIGHT.txt | 29 - SConstruct | 2 - doc/classes/VideoPlayer.xml | 4 +- modules/ogg/SCsub | 3 - modules/opus/SCsub | 252 - modules/opus/config.py | 6 - modules/opus/register_types.cpp | 37 - modules/opus/register_types.h | 37 - modules/theora/doc_classes/VideoStreamTheora.xml | 2 +- modules/theora/video_stream_theora.cpp | 2 +- modules/webm/SCsub | 48 - modules/webm/config.py | 19 - modules/webm/doc_classes/VideoStreamWebm.xml | 28 - modules/webm/libvpx/SCsub | 382 - modules/webm/register_types.cpp | 47 - modules/webm/register_types.h | 37 - modules/webm/video_stream_webm.cpp | 469 -- modules/webm/video_stream_webm.h | 135 - platform/linuxbsd/detect.py | 7 - platform/osx/detect.py | 2 - scene/gui/video_player.cpp | 4 +- thirdparty/README.md | 53 - thirdparty/libsimplewebm/LICENSE | 21 - thirdparty/libsimplewebm/OpusVorbisDecoder.cpp | 264 - thirdparty/libsimplewebm/OpusVorbisDecoder.hpp | 65 - thirdparty/libsimplewebm/VPXDecoder.cpp | 154 - thirdparty/libsimplewebm/VPXDecoder.hpp | 86 - thirdparty/libsimplewebm/WebMDemuxer.cpp | 241 - thirdparty/libsimplewebm/WebMDemuxer.hpp | 125 - thirdparty/libsimplewebm/libwebm/AUTHORS.TXT | 4 - thirdparty/libsimplewebm/libwebm/LICENSE.TXT | 30 - thirdparty/libsimplewebm/libwebm/PATENTS.TXT | 23 - thirdparty/libsimplewebm/libwebm/README.libvpx | 11 - thirdparty/libsimplewebm/libwebm/common/webmids.h | 192 - .../libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h | 28 - .../libsimplewebm/libwebm/mkvparser/mkvparser.cc | 8049 -------------------- .../libsimplewebm/libwebm/mkvparser/mkvparser.h | 1145 --- thirdparty/libvpx/AUTHORS | 142 - thirdparty/libvpx/CHANGELOG | 654 -- thirdparty/libvpx/LICENSE | 31 - thirdparty/libvpx/PATENTS | 23 - thirdparty/libvpx/rtcd/vp8_rtcd_arm.h | 240 - thirdparty/libvpx/rtcd/vp8_rtcd_c.h | 117 - thirdparty/libvpx/rtcd/vp8_rtcd_x86.h | 247 - thirdparty/libvpx/rtcd/vp9_rtcd_arm.h | 54 - thirdparty/libvpx/rtcd/vp9_rtcd_c.h | 41 - thirdparty/libvpx/rtcd/vp9_rtcd_x86.h | 55 - thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h | 678 -- thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h | 355 - thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h | 604 -- .../libvpx/third_party/android/cpu-features.c | 1313 ---- .../libvpx/third_party/android/cpu-features.h | 323 - thirdparty/libvpx/third_party/x86inc/LICENSE | 18 - thirdparty/libvpx/third_party/x86inc/README.libvpx | 20 - thirdparty/libvpx/third_party/x86inc/x86inc.asm | 1649 ---- thirdparty/libvpx/vp8/common/alloccommon.c | 190 - thirdparty/libvpx/vp8/common/alloccommon.h | 31 - thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c | 181 - .../vp8/common/arm/neon/bilinearpredict_neon.c | 591 -- .../libvpx/vp8/common/arm/neon/copymem_neon.c | 59 - .../vp8/common/arm/neon/dc_only_idct_add_neon.c | 42 - .../libvpx/vp8/common/arm/neon/dequant_idct_neon.c | 142 - .../libvpx/vp8/common/arm/neon/dequantizeb_neon.c | 25 - .../libvpx/vp8/common/arm/neon/idct_blk_neon.c | 96 - .../vp8/common/arm/neon/idct_dequant_0_2x_neon.c | 63 - .../common/arm/neon/idct_dequant_full_2x_neon.c | 185 - .../libvpx/vp8/common/arm/neon/iwalsh_neon.c | 102 - .../arm/neon/loopfiltersimplehorizontaledge_neon.c | 111 - .../arm/neon/loopfiltersimpleverticaledge_neon.c | 283 - .../libvpx/vp8/common/arm/neon/mbloopfilter_neon.c | 625 -- .../vp8/common/arm/neon/shortidct4x4llm_neon.c | 123 - .../vp8/common/arm/neon/sixtappredict_neon.c | 1377 ---- .../vp8/common/arm/neon/vp8_loopfilter_neon.c | 550 -- thirdparty/libvpx/vp8/common/blockd.c | 22 - thirdparty/libvpx/vp8/common/blockd.h | 312 - thirdparty/libvpx/vp8/common/coefupdateprobs.h | 197 - thirdparty/libvpx/vp8/common/common.h | 48 - thirdparty/libvpx/vp8/common/copy_c.c | 32 - thirdparty/libvpx/vp8/common/debugmodes.c | 155 - thirdparty/libvpx/vp8/common/default_coef_probs.h | 200 - thirdparty/libvpx/vp8/common/dequantize.c | 43 - thirdparty/libvpx/vp8/common/entropy.c | 188 - thirdparty/libvpx/vp8/common/entropy.h | 109 - thirdparty/libvpx/vp8/common/entropymode.c | 171 - thirdparty/libvpx/vp8/common/entropymode.h | 88 - thirdparty/libvpx/vp8/common/entropymv.c | 49 - thirdparty/libvpx/vp8/common/entropymv.h | 52 - thirdparty/libvpx/vp8/common/extend.c | 188 - thirdparty/libvpx/vp8/common/extend.h | 33 - thirdparty/libvpx/vp8/common/filter.c | 493 -- thirdparty/libvpx/vp8/common/filter.h | 32 - thirdparty/libvpx/vp8/common/findnearmv.c | 193 - thirdparty/libvpx/vp8/common/findnearmv.h | 195 - .../libvpx/vp8/common/generic/systemdependent.c | 106 - thirdparty/libvpx/vp8/common/header.h | 51 - thirdparty/libvpx/vp8/common/idct_blk.c | 90 - thirdparty/libvpx/vp8/common/idctllm.c | 205 - thirdparty/libvpx/vp8/common/invtrans.h | 70 - thirdparty/libvpx/vp8/common/loopfilter.h | 113 - thirdparty/libvpx/vp8/common/loopfilter_filters.c | 430 -- thirdparty/libvpx/vp8/common/mbpitch.c | 68 - thirdparty/libvpx/vp8/common/modecont.c | 40 - thirdparty/libvpx/vp8/common/modecont.h | 25 - thirdparty/libvpx/vp8/common/mv.h | 36 - thirdparty/libvpx/vp8/common/onyxc_int.h | 185 - thirdparty/libvpx/vp8/common/onyxd.h | 63 - thirdparty/libvpx/vp8/common/ppflags.h | 49 - thirdparty/libvpx/vp8/common/quant_common.c | 135 - thirdparty/libvpx/vp8/common/quant_common.h | 34 - thirdparty/libvpx/vp8/common/reconinter.c | 544 -- thirdparty/libvpx/vp8/common/reconinter.h | 43 - thirdparty/libvpx/vp8/common/reconintra.c | 117 - thirdparty/libvpx/vp8/common/reconintra.h | 44 - thirdparty/libvpx/vp8/common/reconintra4x4.c | 54 - thirdparty/libvpx/vp8/common/reconintra4x4.h | 48 - thirdparty/libvpx/vp8/common/rtcd.c | 19 - thirdparty/libvpx/vp8/common/setupintrarecon.c | 39 - thirdparty/libvpx/vp8/common/setupintrarecon.h | 45 - thirdparty/libvpx/vp8/common/swapyv12buffer.c | 34 - thirdparty/libvpx/vp8/common/swapyv12buffer.h | 27 - thirdparty/libvpx/vp8/common/systemdependent.h | 27 - thirdparty/libvpx/vp8/common/threading.h | 232 - thirdparty/libvpx/vp8/common/treecoder.c | 143 - thirdparty/libvpx/vp8/common/treecoder.h | 98 - thirdparty/libvpx/vp8/common/vp8_entropymodedata.h | 254 - thirdparty/libvpx/vp8/common/vp8_loopfilter.c | 661 -- thirdparty/libvpx/vp8/common/x86/copy_sse2.asm | 93 - thirdparty/libvpx/vp8/common/x86/copy_sse3.asm | 146 - .../libvpx/vp8/common/x86/dequantize_mmx.asm | 258 - thirdparty/libvpx/vp8/common/x86/filter_x86.c | 35 - thirdparty/libvpx/vp8/common/x86/filter_x86.h | 33 - thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c | 128 - thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c | 89 - thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm | 295 - thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm | 708 -- thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm | 140 - thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm | 121 - .../common/x86/loopfilter_block_sse2_x86_64.asm | 815 -- .../libvpx/vp8/common/x86/loopfilter_sse2.asm | 1640 ---- thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c | 198 - thirdparty/libvpx/vp8/common/x86/recon_mmx.asm | 274 - thirdparty/libvpx/vp8/common/x86/recon_sse2.asm | 116 - thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm | 702 -- thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm | 1372 ---- .../libvpx/vp8/common/x86/subpixel_ssse3.asm | 1508 ---- thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c | 625 -- .../libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm | 1753 ----- thirdparty/libvpx/vp8/decoder/dboolhuff.c | 77 - thirdparty/libvpx/vp8/decoder/dboolhuff.h | 141 - thirdparty/libvpx/vp8/decoder/decodeframe.c | 1397 ---- thirdparty/libvpx/vp8/decoder/decodemv.c | 670 -- thirdparty/libvpx/vp8/decoder/decodemv.h | 26 - thirdparty/libvpx/vp8/decoder/decoderthreading.h | 30 - thirdparty/libvpx/vp8/decoder/detokenize.c | 245 - thirdparty/libvpx/vp8/decoder/detokenize.h | 27 - thirdparty/libvpx/vp8/decoder/onyxd_if.c | 521 -- thirdparty/libvpx/vp8/decoder/onyxd_int.h | 161 - thirdparty/libvpx/vp8/decoder/threading.c | 928 --- thirdparty/libvpx/vp8/decoder/treereader.h | 49 - thirdparty/libvpx/vp8/vp8_dx_iface.c | 828 -- thirdparty/libvpx/vp8_rtcd.h | 9 - .../vp9/common/arm/neon/vp9_iht4x4_add_neon.c | 248 - .../vp9/common/arm/neon/vp9_iht8x8_add_neon.c | 624 -- thirdparty/libvpx/vp9/common/vp9_alloccommon.c | 201 - thirdparty/libvpx/vp9/common/vp9_alloccommon.h | 45 - thirdparty/libvpx/vp9/common/vp9_blockd.c | 135 - thirdparty/libvpx/vp9/common/vp9_blockd.h | 305 - thirdparty/libvpx/vp9/common/vp9_common.h | 74 - thirdparty/libvpx/vp9/common/vp9_common_data.c | 176 - thirdparty/libvpx/vp9/common/vp9_common_data.h | 44 - thirdparty/libvpx/vp9/common/vp9_debugmodes.c | 91 - thirdparty/libvpx/vp9/common/vp9_entropy.c | 802 -- thirdparty/libvpx/vp9/common/vp9_entropy.h | 200 - thirdparty/libvpx/vp9/common/vp9_entropymode.c | 469 -- thirdparty/libvpx/vp9/common/vp9_entropymode.h | 107 - thirdparty/libvpx/vp9/common/vp9_entropymv.c | 210 - thirdparty/libvpx/vp9/common/vp9_entropymv.h | 140 - thirdparty/libvpx/vp9/common/vp9_enums.h | 147 - thirdparty/libvpx/vp9/common/vp9_filter.c | 104 - thirdparty/libvpx/vp9/common/vp9_filter.h | 42 - thirdparty/libvpx/vp9/common/vp9_frame_buffers.c | 86 - thirdparty/libvpx/vp9/common/vp9_frame_buffers.h | 53 - thirdparty/libvpx/vp9/common/vp9_idct.c | 405 - thirdparty/libvpx/vp9/common/vp9_idct.h | 81 - thirdparty/libvpx/vp9/common/vp9_loopfilter.c | 1697 ----- thirdparty/libvpx/vp9/common/vp9_loopfilter.h | 166 - thirdparty/libvpx/vp9/common/vp9_mv.h | 55 - thirdparty/libvpx/vp9/common/vp9_mvref_common.c | 201 - thirdparty/libvpx/vp9/common/vp9_mvref_common.h | 241 - thirdparty/libvpx/vp9/common/vp9_onyxc_int.h | 446 -- thirdparty/libvpx/vp9/common/vp9_ppflags.h | 43 - thirdparty/libvpx/vp9/common/vp9_pred_common.c | 314 - thirdparty/libvpx/vp9/common/vp9_pred_common.h | 192 - thirdparty/libvpx/vp9/common/vp9_quant_common.c | 278 - thirdparty/libvpx/vp9/common/vp9_quant_common.h | 36 - thirdparty/libvpx/vp9/common/vp9_reconinter.c | 309 - thirdparty/libvpx/vp9/common/vp9_reconinter.h | 119 - thirdparty/libvpx/vp9/common/vp9_reconintra.c | 445 -- thirdparty/libvpx/vp9/common/vp9_reconintra.h | 32 - thirdparty/libvpx/vp9/common/vp9_rtcd.c | 19 - thirdparty/libvpx/vp9/common/vp9_scale.c | 175 - thirdparty/libvpx/vp9/common/vp9_scale.h | 75 - thirdparty/libvpx/vp9/common/vp9_scan.c | 725 -- thirdparty/libvpx/vp9/common/vp9_scan.h | 57 - thirdparty/libvpx/vp9/common/vp9_seg_common.c | 64 - thirdparty/libvpx/vp9/common/vp9_seg_common.h | 87 - thirdparty/libvpx/vp9/common/vp9_thread_common.c | 435 -- thirdparty/libvpx/vp9/common/vp9_thread_common.h | 65 - thirdparty/libvpx/vp9/common/vp9_tile_common.c | 59 - thirdparty/libvpx/vp9/common/vp9_tile_common.h | 40 - .../libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c | 181 - thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c | 2271 ------ thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h | 37 - thirdparty/libvpx/vp9/decoder/vp9_decodemv.c | 911 --- thirdparty/libvpx/vp9/decoder/vp9_decodemv.h | 30 - thirdparty/libvpx/vp9/decoder/vp9_decoder.c | 518 -- thirdparty/libvpx/vp9/decoder/vp9_decoder.h | 137 - thirdparty/libvpx/vp9/decoder/vp9_detokenize.c | 224 - thirdparty/libvpx/vp9/decoder/vp9_detokenize.h | 33 - thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c | 76 - thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h | 27 - thirdparty/libvpx/vp9/decoder/vp9_dthread.c | 189 - thirdparty/libvpx/vp9/decoder/vp9_dthread.h | 74 - thirdparty/libvpx/vp9/vp9_dx_iface.c | 1093 --- thirdparty/libvpx/vp9/vp9_dx_iface.h | 65 - thirdparty/libvpx/vp9/vp9_iface_common.h | 136 - thirdparty/libvpx/vp9_rtcd.h | 9 - .../libvpx/vpx/internal/vpx_codec_internal.h | 445 -- thirdparty/libvpx/vpx/internal/vpx_psnr.h | 34 - thirdparty/libvpx/vpx/src/vpx_codec.c | 158 - thirdparty/libvpx/vpx/src/vpx_decoder.c | 197 - thirdparty/libvpx/vpx/src/vpx_image.c | 285 - thirdparty/libvpx/vpx/src/vpx_psnr.c | 24 - thirdparty/libvpx/vpx/vp8.h | 148 - thirdparty/libvpx/vpx/vp8dx.h | 176 - thirdparty/libvpx/vpx/vpx_codec.h | 479 -- thirdparty/libvpx/vpx/vpx_decoder.h | 378 - thirdparty/libvpx/vpx/vpx_encoder.h | 1043 --- thirdparty/libvpx/vpx/vpx_frame_buffer.h | 83 - thirdparty/libvpx/vpx/vpx_image.h | 235 - thirdparty/libvpx/vpx/vpx_integer.h | 74 - thirdparty/libvpx/vpx_config.asm | 65 - thirdparty/libvpx/vpx_config.h | 140 - .../vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm | 643 -- .../vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm | 641 -- .../libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm | 39 - .../libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s | 658 -- .../libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s | 647 -- thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s | 44 - .../vpx_dsp/arm/gas_apple/intrapred_neon_asm.s | 660 -- .../vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s | 649 -- .../libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s | 46 - .../libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c | 61 - thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c | 1317 ---- thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c | 185 - .../libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c | 165 - thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c | 719 -- thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c | 50 - thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c | 151 - thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c | 64 - thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c | 540 -- thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c | 822 -- thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c | 179 - thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c | 266 - thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c | 445 -- thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c | 58 - .../libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c | 373 - thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c | 340 - .../libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c | 147 - .../libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c | 94 - thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c | 72 - thirdparty/libvpx/vpx_dsp/bitreader.c | 103 - thirdparty/libvpx/vpx_dsp/bitreader.h | 140 - thirdparty/libvpx/vpx_dsp/bitreader_buffer.c | 53 - thirdparty/libvpx/vpx_dsp/bitreader_buffer.h | 47 - thirdparty/libvpx/vpx_dsp/intrapred.c | 870 --- thirdparty/libvpx/vpx_dsp/inv_txfm.c | 2518 ------ thirdparty/libvpx/vpx_dsp/inv_txfm.h | 133 - thirdparty/libvpx/vpx_dsp/loopfilter.c | 767 -- thirdparty/libvpx/vpx_dsp/prob.c | 53 - thirdparty/libvpx/vpx_dsp/prob.h | 103 - thirdparty/libvpx/vpx_dsp/txfm_common.h | 66 - thirdparty/libvpx/vpx_dsp/vpx_convolve.c | 612 -- thirdparty/libvpx/vpx_dsp/vpx_convolve.h | 38 - thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h | 69 - thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c | 17 - thirdparty/libvpx/vpx_dsp/vpx_filter.h | 34 - thirdparty/libvpx/vpx_dsp/x86/convolve.h | 274 - thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm | 860 --- thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm | 871 --- thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c | 4046 ---------- thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h | 196 - .../libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm | 1793 ----- thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm | 109 - thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c | 979 --- thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c | 1776 ----- thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h | 29 - thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c | 162 - .../libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm | 228 - .../vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c | 606 -- .../vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c | 915 --- .../libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm | 987 --- .../libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm | 629 -- .../vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm | 448 -- .../vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm | 422 - thirdparty/libvpx/vpx_dsp_rtcd.h | 9 - thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h | 31 - thirdparty/libvpx/vpx_mem/vpx_mem.c | 100 - thirdparty/libvpx/vpx_mem/vpx_mem.h | 47 - thirdparty/libvpx/vpx_ports/arm.h | 41 - thirdparty/libvpx/vpx_ports/arm_cpudetect.c | 170 - thirdparty/libvpx/vpx_ports/bitops.h | 76 - thirdparty/libvpx/vpx_ports/config.h | 16 - thirdparty/libvpx/vpx_ports/emmintrin_compat.h | 55 - thirdparty/libvpx/vpx_ports/emms.asm | 38 - thirdparty/libvpx/vpx_ports/mem.h | 53 - thirdparty/libvpx/vpx_ports/mem_ops.h | 226 - thirdparty/libvpx/vpx_ports/mem_ops_aligned.h | 169 - thirdparty/libvpx/vpx_ports/msvc.h | 32 - thirdparty/libvpx/vpx_ports/system_state.h | 22 - thirdparty/libvpx/vpx_ports/vpx_once.h | 150 - thirdparty/libvpx/vpx_ports/vpx_timer.h | 120 - thirdparty/libvpx/vpx_ports/x86.h | 330 - thirdparty/libvpx/vpx_ports/x86_abi_support.asm | 404 - thirdparty/libvpx/vpx_scale/generic/yv12config.c | 287 - thirdparty/libvpx/vpx_scale/generic/yv12extend.c | 324 - thirdparty/libvpx/vpx_scale/vpx_scale.h | 27 - thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c | 18 - thirdparty/libvpx/vpx_scale/yv12config.h | 105 - thirdparty/libvpx/vpx_scale_rtcd.h | 44 - thirdparty/libvpx/vpx_util/endian_inl.h | 120 - thirdparty/libvpx/vpx_util/vpx_thread.c | 184 - thirdparty/libvpx/vpx_util/vpx_thread.h | 369 - thirdparty/libvpx/vpx_version.h | 7 - thirdparty/opus/COPYING | 44 - thirdparty/opus/analysis.c | 672 -- thirdparty/opus/analysis.h | 103 - thirdparty/opus/celt/_kiss_fft_guts.h | 182 - thirdparty/opus/celt/arch.h | 252 - thirdparty/opus/celt/arm/arm_celt_map.c | 143 - thirdparty/opus/celt/arm/armcpu.c | 185 - thirdparty/opus/celt/arm/armcpu.h | 77 - thirdparty/opus/celt/arm/armopts.s.in | 37 - thirdparty/opus/celt/arm/celt_ne10_fft.c | 174 - thirdparty/opus/celt/arm/celt_ne10_mdct.c | 258 - thirdparty/opus/celt/arm/celt_neon_intr.c | 311 - .../opus/celt/arm/celt_pitch_xcorr_arm-gnu.S | 551 -- thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s | 547 -- thirdparty/opus/celt/arm/fft_arm.h | 72 - thirdparty/opus/celt/arm/fixed_arm64.h | 35 - thirdparty/opus/celt/arm/fixed_armv4.h | 80 - thirdparty/opus/celt/arm/fixed_armv5e.h | 151 - thirdparty/opus/celt/arm/kiss_fft_armv4.h | 121 - thirdparty/opus/celt/arm/kiss_fft_armv5e.h | 118 - thirdparty/opus/celt/arm/mdct_arm.h | 60 - thirdparty/opus/celt/arm/pitch_arm.h | 126 - thirdparty/opus/celt/bands.c | 1529 ---- thirdparty/opus/celt/bands.h | 120 - thirdparty/opus/celt/celt.c | 299 - thirdparty/opus/celt/celt.h | 229 - thirdparty/opus/celt/celt_decoder.c | 1248 --- thirdparty/opus/celt/celt_encoder.c | 2410 ------ thirdparty/opus/celt/celt_lpc.c | 314 - thirdparty/opus/celt/celt_lpc.h | 67 - thirdparty/opus/celt/cpu_support.h | 70 - thirdparty/opus/celt/cwrs.c | 715 -- thirdparty/opus/celt/cwrs.h | 48 - thirdparty/opus/celt/ecintrin.h | 87 - thirdparty/opus/celt/entcode.c | 153 - thirdparty/opus/celt/entcode.h | 152 - thirdparty/opus/celt/entdec.c | 245 - thirdparty/opus/celt/entdec.h | 100 - thirdparty/opus/celt/entenc.c | 294 - thirdparty/opus/celt/entenc.h | 110 - thirdparty/opus/celt/fixed_debug.h | 784 -- thirdparty/opus/celt/fixed_generic.h | 167 - thirdparty/opus/celt/float_cast.h | 140 - thirdparty/opus/celt/kiss_fft.c | 604 -- thirdparty/opus/celt/kiss_fft.h | 200 - thirdparty/opus/celt/laplace.c | 134 - thirdparty/opus/celt/laplace.h | 48 - thirdparty/opus/celt/mathops.c | 208 - thirdparty/opus/celt/mathops.h | 258 - thirdparty/opus/celt/mdct.c | 343 - thirdparty/opus/celt/mdct.h | 112 - thirdparty/opus/celt/mfrngcod.h | 48 - thirdparty/opus/celt/mips/celt_mipsr1.h | 151 - thirdparty/opus/celt/mips/fixed_generic_mipsr1.h | 126 - thirdparty/opus/celt/mips/kiss_fft_mipsr1.h | 167 - thirdparty/opus/celt/mips/mdct_mipsr1.h | 288 - thirdparty/opus/celt/mips/pitch_mipsr1.h | 161 - thirdparty/opus/celt/mips/vq_mipsr1.h | 125 - thirdparty/opus/celt/modes.c | 442 -- thirdparty/opus/celt/modes.h | 75 - thirdparty/opus/celt/opus_custom_demo.c | 210 - thirdparty/opus/celt/os_support.h | 92 - thirdparty/opus/celt/pitch.c | 557 -- thirdparty/opus/celt/pitch.h | 200 - thirdparty/opus/celt/quant_bands.c | 556 -- thirdparty/opus/celt/quant_bands.h | 66 - thirdparty/opus/celt/rate.c | 639 -- thirdparty/opus/celt/rate.h | 101 - thirdparty/opus/celt/stack_alloc.h | 184 - thirdparty/opus/celt/static_modes_fixed.h | 892 --- thirdparty/opus/celt/static_modes_fixed_arm_ne10.h | 388 - thirdparty/opus/celt/static_modes_float.h | 888 --- thirdparty/opus/celt/static_modes_float_arm_ne10.h | 404 - thirdparty/opus/celt/tests/test_unit_cwrs32.c | 161 - thirdparty/opus/celt/tests/test_unit_dft.c | 189 - thirdparty/opus/celt/tests/test_unit_entropy.c | 382 - thirdparty/opus/celt/tests/test_unit_laplace.c | 93 - thirdparty/opus/celt/tests/test_unit_mathops.c | 304 - thirdparty/opus/celt/tests/test_unit_mdct.c | 230 - thirdparty/opus/celt/tests/test_unit_rotation.c | 120 - thirdparty/opus/celt/tests/test_unit_types.c | 50 - thirdparty/opus/celt/vq.c | 408 - thirdparty/opus/celt/vq.h | 75 - thirdparty/opus/celt/x86/celt_lpc_sse.c | 132 - thirdparty/opus/celt/x86/celt_lpc_sse.h | 68 - thirdparty/opus/celt/x86/pitch_sse.c | 185 - thirdparty/opus/celt/x86/pitch_sse.h | 192 - thirdparty/opus/celt/x86/pitch_sse2.c | 95 - thirdparty/opus/celt/x86/pitch_sse4_1.c | 195 - thirdparty/opus/celt/x86/x86_celt_map.c | 155 - thirdparty/opus/celt/x86/x86cpu.c | 157 - thirdparty/opus/celt/x86/x86cpu.h | 93 - thirdparty/opus/config.h | 133 - thirdparty/opus/info.c | 758 -- thirdparty/opus/internal.c | 42 - thirdparty/opus/internal.h | 254 - thirdparty/opus/mlp.c | 145 - thirdparty/opus/mlp.h | 43 - thirdparty/opus/mlp_data.c | 109 - thirdparty/opus/opus.c | 356 - thirdparty/opus/opus/opus.h | 981 --- thirdparty/opus/opus/opus_custom.h | 342 - thirdparty/opus/opus/opus_defines.h | 753 -- thirdparty/opus/opus/opus_multistream.h | 660 -- thirdparty/opus/opus/opus_types.h | 159 - thirdparty/opus/opus/opusfile.h | 2157 ------ thirdparty/opus/opus_compare.c | 379 - thirdparty/opus/opus_decoder.c | 981 --- thirdparty/opus/opus_encoder.c | 2536 ------ thirdparty/opus/opus_multistream.c | 92 - thirdparty/opus/opus_multistream_decoder.c | 537 -- thirdparty/opus/opus_multistream_encoder.c | 1351 ---- thirdparty/opus/opus_private.h | 134 - thirdparty/opus/opusfile.c | 3266 -------- thirdparty/opus/repacketizer.c | 348 - thirdparty/opus/repacketizer_demo.c | 217 - thirdparty/opus/silk/A2NLSF.c | 267 - thirdparty/opus/silk/API.h | 134 - thirdparty/opus/silk/CNG.c | 184 - thirdparty/opus/silk/HP_variable_cutoff.c | 77 - thirdparty/opus/silk/Inlines.h | 188 - thirdparty/opus/silk/LPC_analysis_filter.c | 108 - thirdparty/opus/silk/LPC_inv_pred_gain.c | 154 - thirdparty/opus/silk/LP_variable_cutoff.c | 135 - thirdparty/opus/silk/MacroCount.h | 718 -- thirdparty/opus/silk/MacroDebug.h | 952 --- thirdparty/opus/silk/NLSF2A.c | 178 - thirdparty/opus/silk/NLSF_VQ.c | 68 - thirdparty/opus/silk/NLSF_VQ_weights_laroia.c | 80 - thirdparty/opus/silk/NLSF_decode.c | 101 - thirdparty/opus/silk/NLSF_del_dec_quant.c | 217 - thirdparty/opus/silk/NLSF_encode.c | 137 - thirdparty/opus/silk/NLSF_stabilize.c | 142 - thirdparty/opus/silk/NLSF_unpack.c | 55 - thirdparty/opus/silk/NSQ.c | 429 -- thirdparty/opus/silk/NSQ.h | 101 - thirdparty/opus/silk/NSQ_del_dec.c | 716 -- thirdparty/opus/silk/PLC.c | 446 -- thirdparty/opus/silk/PLC.h | 62 - thirdparty/opus/silk/SigProc_FIX.h | 615 -- thirdparty/opus/silk/VAD.c | 362 - thirdparty/opus/silk/VQ_WMat_EC.c | 120 - thirdparty/opus/silk/ana_filt_bank_1.c | 74 - thirdparty/opus/silk/arm/NSQ_neon.c | 112 - thirdparty/opus/silk/arm/NSQ_neon.h | 113 - thirdparty/opus/silk/arm/SigProc_FIX_armv4.h | 47 - thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h | 61 - thirdparty/opus/silk/arm/arm_silk_map.c | 55 - thirdparty/opus/silk/arm/macros_arm64.h | 39 - thirdparty/opus/silk/arm/macros_armv4.h | 103 - thirdparty/opus/silk/arm/macros_armv5e.h | 213 - thirdparty/opus/silk/biquad_alt.c | 78 - thirdparty/opus/silk/bwexpander.c | 51 - thirdparty/opus/silk/bwexpander_32.c | 50 - thirdparty/opus/silk/check_control_input.c | 106 - thirdparty/opus/silk/code_signs.c | 115 - thirdparty/opus/silk/control.h | 142 - thirdparty/opus/silk/control_SNR.c | 76 - thirdparty/opus/silk/control_audio_bandwidth.c | 126 - thirdparty/opus/silk/control_codec.c | 428 -- thirdparty/opus/silk/debug.c | 170 - thirdparty/opus/silk/debug.h | 279 - thirdparty/opus/silk/dec_API.c | 419 - thirdparty/opus/silk/decode_core.c | 239 - thirdparty/opus/silk/decode_frame.c | 129 - thirdparty/opus/silk/decode_indices.c | 151 - thirdparty/opus/silk/decode_parameters.c | 115 - thirdparty/opus/silk/decode_pitch.c | 77 - thirdparty/opus/silk/decode_pulses.c | 115 - thirdparty/opus/silk/decoder_set_fs.c | 108 - thirdparty/opus/silk/define.h | 235 - thirdparty/opus/silk/enc_API.c | 563 -- thirdparty/opus/silk/encode_indices.c | 181 - thirdparty/opus/silk/encode_pulses.c | 206 - thirdparty/opus/silk/errors.h | 98 - .../opus/silk/fixed/LTP_analysis_filter_FIX.c | 90 - thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c | 53 - thirdparty/opus/silk/fixed/apply_sine_window_FIX.c | 101 - thirdparty/opus/silk/fixed/autocorr_FIX.c | 48 - thirdparty/opus/silk/fixed/burg_modified_FIX.c | 280 - thirdparty/opus/silk/fixed/corrMatrix_FIX.c | 158 - thirdparty/opus/silk/fixed/encode_frame_FIX.c | 387 - thirdparty/opus/silk/fixed/find_LPC_FIX.c | 151 - thirdparty/opus/silk/fixed/find_LTP_FIX.c | 245 - thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c | 145 - thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c | 148 - thirdparty/opus/silk/fixed/k2a_FIX.c | 53 - thirdparty/opus/silk/fixed/k2a_Q16_FIX.c | 53 - thirdparty/opus/silk/fixed/main_FIX.h | 272 - .../fixed/mips/noise_shape_analysis_FIX_mipsr1.h | 336 - .../opus/silk/fixed/mips/prefilter_FIX_mipsr1.h | 184 - .../fixed/mips/warped_autocorrelation_FIX_mipsr1.h | 165 - .../opus/silk/fixed/noise_shape_analysis_FIX.c | 451 -- .../opus/silk/fixed/pitch_analysis_core_FIX.c | 746 -- thirdparty/opus/silk/fixed/prefilter_FIX.c | 221 - thirdparty/opus/silk/fixed/process_gains_FIX.c | 117 - .../opus/silk/fixed/regularize_correlations_FIX.c | 47 - thirdparty/opus/silk/fixed/residual_energy16_FIX.c | 103 - thirdparty/opus/silk/fixed/residual_energy_FIX.c | 98 - thirdparty/opus/silk/fixed/schur64_FIX.c | 92 - thirdparty/opus/silk/fixed/schur_FIX.c | 106 - thirdparty/opus/silk/fixed/solve_LS_FIX.c | 249 - thirdparty/opus/silk/fixed/structs_FIX.h | 134 - thirdparty/opus/silk/fixed/vector_ops_FIX.c | 102 - .../opus/silk/fixed/warped_autocorrelation_FIX.c | 95 - .../opus/silk/fixed/x86/burg_modified_FIX_sse.c | 377 - thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c | 160 - .../opus/silk/fixed/x86/vector_ops_FIX_sse.c | 88 - .../opus/silk/float/LPC_analysis_filter_FLP.c | 249 - thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c | 76 - .../opus/silk/float/LTP_analysis_filter_FLP.c | 75 - thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c | 52 - thirdparty/opus/silk/float/SigProc_FLP.h | 204 - thirdparty/opus/silk/float/apply_sine_window_FLP.c | 81 - thirdparty/opus/silk/float/autocorrelation_FLP.c | 52 - thirdparty/opus/silk/float/burg_modified_FLP.c | 186 - thirdparty/opus/silk/float/bwexpander_FLP.c | 49 - thirdparty/opus/silk/float/corrMatrix_FLP.c | 93 - thirdparty/opus/silk/float/encode_frame_FLP.c | 372 - thirdparty/opus/silk/float/energy_FLP.c | 60 - thirdparty/opus/silk/float/find_LPC_FLP.c | 104 - thirdparty/opus/silk/float/find_LTP_FLP.c | 132 - thirdparty/opus/silk/float/find_pitch_lags_FLP.c | 132 - thirdparty/opus/silk/float/find_pred_coefs_FLP.c | 118 - thirdparty/opus/silk/float/inner_product_FLP.c | 60 - thirdparty/opus/silk/float/k2a_FLP.c | 53 - thirdparty/opus/silk/float/levinsondurbin_FLP.c | 81 - thirdparty/opus/silk/float/main_FLP.h | 313 - .../opus/silk/float/noise_shape_analysis_FLP.c | 365 - .../opus/silk/float/pitch_analysis_core_FLP.c | 630 -- thirdparty/opus/silk/float/prefilter_FLP.c | 206 - thirdparty/opus/silk/float/process_gains_FLP.c | 103 - .../opus/silk/float/regularize_correlations_FLP.c | 48 - thirdparty/opus/silk/float/residual_energy_FLP.c | 117 - thirdparty/opus/silk/float/scale_copy_vector_FLP.c | 57 - thirdparty/opus/silk/float/scale_vector_FLP.c | 56 - thirdparty/opus/silk/float/schur_FLP.c | 70 - thirdparty/opus/silk/float/solve_LS_FLP.c | 207 - thirdparty/opus/silk/float/sort_FLP.c | 83 - thirdparty/opus/silk/float/structs_FLP.h | 132 - .../opus/silk/float/warped_autocorrelation_FLP.c | 73 - thirdparty/opus/silk/float/wrappers_FLP.c | 202 - thirdparty/opus/silk/gain_quant.c | 141 - thirdparty/opus/silk/init_decoder.c | 56 - thirdparty/opus/silk/init_encoder.c | 64 - thirdparty/opus/silk/inner_prod_aligned.c | 47 - thirdparty/opus/silk/interpolate.c | 51 - thirdparty/opus/silk/lin2log.c | 46 - thirdparty/opus/silk/log2lin.c | 58 - thirdparty/opus/silk/macros.h | 159 - thirdparty/opus/silk/main.h | 471 -- thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h | 409 - thirdparty/opus/silk/mips/macros_mipsr1.h | 92 - thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h | 65 - thirdparty/opus/silk/pitch_est_defines.h | 88 - thirdparty/opus/silk/pitch_est_tables.c | 99 - thirdparty/opus/silk/process_NLSFs.c | 107 - thirdparty/opus/silk/quant_LTP_gains.c | 129 - thirdparty/opus/silk/resampler.c | 215 - thirdparty/opus/silk/resampler_down2.c | 74 - thirdparty/opus/silk/resampler_down2_3.c | 103 - thirdparty/opus/silk/resampler_private.h | 88 - thirdparty/opus/silk/resampler_private_AR2.c | 55 - thirdparty/opus/silk/resampler_private_IIR_FIR.c | 107 - thirdparty/opus/silk/resampler_private_down_FIR.c | 194 - thirdparty/opus/silk/resampler_private_up2_HQ.c | 113 - thirdparty/opus/silk/resampler_rom.c | 96 - thirdparty/opus/silk/resampler_rom.h | 68 - thirdparty/opus/silk/resampler_structs.h | 60 - thirdparty/opus/silk/shell_coder.c | 151 - thirdparty/opus/silk/sigm_Q15.c | 76 - thirdparty/opus/silk/sort.c | 154 - thirdparty/opus/silk/stereo_LR_to_MS.c | 229 - thirdparty/opus/silk/stereo_MS_to_LR.c | 85 - thirdparty/opus/silk/stereo_decode_pred.c | 73 - thirdparty/opus/silk/stereo_encode_pred.c | 62 - thirdparty/opus/silk/stereo_find_predictor.c | 79 - thirdparty/opus/silk/stereo_quant_pred.c | 73 - thirdparty/opus/silk/structs.h | 327 - thirdparty/opus/silk/sum_sqr_shift.c | 86 - thirdparty/opus/silk/table_LSF_cos.c | 70 - thirdparty/opus/silk/tables.h | 122 - thirdparty/opus/silk/tables_LTP.c | 296 - thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c | 159 - thirdparty/opus/silk/tables_NLSF_CB_WB.c | 198 - thirdparty/opus/silk/tables_gain.c | 63 - thirdparty/opus/silk/tables_other.c | 138 - thirdparty/opus/silk/tables_pitch_lag.c | 69 - thirdparty/opus/silk/tables_pulses_per_block.c | 264 - thirdparty/opus/silk/tuning_parameters.h | 171 - thirdparty/opus/silk/typedef.h | 78 - thirdparty/opus/silk/x86/NSQ_del_dec_sse.c | 857 --- thirdparty/opus/silk/x86/NSQ_sse.c | 720 -- thirdparty/opus/silk/x86/SigProc_FIX_sse.h | 94 - thirdparty/opus/silk/x86/VAD_sse.c | 277 - thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c | 142 - thirdparty/opus/silk/x86/main_sse.h | 277 - thirdparty/opus/silk/x86/x86_silk_map.c | 174 - thirdparty/opus/stream.c | 366 - thirdparty/opus/tansig_table.h | 45 - 634 files changed, 6 insertions(+), 176465 deletions(-) delete mode 100644 modules/opus/SCsub delete mode 100644 modules/opus/config.py delete mode 100644 modules/opus/register_types.cpp delete mode 100644 modules/opus/register_types.h delete mode 100644 modules/webm/SCsub delete mode 100644 modules/webm/config.py delete mode 100644 modules/webm/doc_classes/VideoStreamWebm.xml delete mode 100644 modules/webm/libvpx/SCsub delete mode 100644 modules/webm/register_types.cpp delete mode 100644 modules/webm/register_types.h delete mode 100644 modules/webm/video_stream_webm.cpp delete mode 100644 modules/webm/video_stream_webm.h delete mode 100644 thirdparty/libsimplewebm/LICENSE delete mode 100644 thirdparty/libsimplewebm/OpusVorbisDecoder.cpp delete mode 100644 thirdparty/libsimplewebm/OpusVorbisDecoder.hpp delete mode 100644 thirdparty/libsimplewebm/VPXDecoder.cpp delete mode 100644 thirdparty/libsimplewebm/VPXDecoder.hpp delete mode 100644 thirdparty/libsimplewebm/WebMDemuxer.cpp delete mode 100644 thirdparty/libsimplewebm/WebMDemuxer.hpp delete mode 100644 thirdparty/libsimplewebm/libwebm/AUTHORS.TXT delete mode 100644 thirdparty/libsimplewebm/libwebm/LICENSE.TXT delete mode 100644 thirdparty/libsimplewebm/libwebm/PATENTS.TXT delete mode 100644 thirdparty/libsimplewebm/libwebm/README.libvpx delete mode 100644 thirdparty/libsimplewebm/libwebm/common/webmids.h delete mode 100644 thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h delete mode 100644 thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc delete mode 100644 thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h delete mode 100644 thirdparty/libvpx/AUTHORS delete mode 100644 thirdparty/libvpx/CHANGELOG delete mode 100644 thirdparty/libvpx/LICENSE delete mode 100644 thirdparty/libvpx/PATENTS delete mode 100644 thirdparty/libvpx/rtcd/vp8_rtcd_arm.h delete mode 100644 thirdparty/libvpx/rtcd/vp8_rtcd_c.h delete mode 100644 thirdparty/libvpx/rtcd/vp8_rtcd_x86.h delete mode 100644 thirdparty/libvpx/rtcd/vp9_rtcd_arm.h delete mode 100644 thirdparty/libvpx/rtcd/vp9_rtcd_c.h delete mode 100644 thirdparty/libvpx/rtcd/vp9_rtcd_x86.h delete mode 100644 thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h delete mode 100644 thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h delete mode 100644 thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h delete mode 100644 thirdparty/libvpx/third_party/android/cpu-features.c delete mode 100644 thirdparty/libvpx/third_party/android/cpu-features.h delete mode 100644 thirdparty/libvpx/third_party/x86inc/LICENSE delete mode 100644 thirdparty/libvpx/third_party/x86inc/README.libvpx delete mode 100644 thirdparty/libvpx/third_party/x86inc/x86inc.asm delete mode 100644 thirdparty/libvpx/vp8/common/alloccommon.c delete mode 100644 thirdparty/libvpx/vp8/common/alloccommon.h delete mode 100644 thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c delete mode 100644 thirdparty/libvpx/vp8/common/blockd.c delete mode 100644 thirdparty/libvpx/vp8/common/blockd.h delete mode 100644 thirdparty/libvpx/vp8/common/coefupdateprobs.h delete mode 100644 thirdparty/libvpx/vp8/common/common.h delete mode 100644 thirdparty/libvpx/vp8/common/copy_c.c delete mode 100644 thirdparty/libvpx/vp8/common/debugmodes.c delete mode 100644 thirdparty/libvpx/vp8/common/default_coef_probs.h delete mode 100644 thirdparty/libvpx/vp8/common/dequantize.c delete mode 100644 thirdparty/libvpx/vp8/common/entropy.c delete mode 100644 thirdparty/libvpx/vp8/common/entropy.h delete mode 100644 thirdparty/libvpx/vp8/common/entropymode.c delete mode 100644 thirdparty/libvpx/vp8/common/entropymode.h delete mode 100644 thirdparty/libvpx/vp8/common/entropymv.c delete mode 100644 thirdparty/libvpx/vp8/common/entropymv.h delete mode 100644 thirdparty/libvpx/vp8/common/extend.c delete mode 100644 thirdparty/libvpx/vp8/common/extend.h delete mode 100644 thirdparty/libvpx/vp8/common/filter.c delete mode 100644 thirdparty/libvpx/vp8/common/filter.h delete mode 100644 thirdparty/libvpx/vp8/common/findnearmv.c delete mode 100644 thirdparty/libvpx/vp8/common/findnearmv.h delete mode 100644 thirdparty/libvpx/vp8/common/generic/systemdependent.c delete mode 100644 thirdparty/libvpx/vp8/common/header.h delete mode 100644 thirdparty/libvpx/vp8/common/idct_blk.c delete mode 100644 thirdparty/libvpx/vp8/common/idctllm.c delete mode 100644 thirdparty/libvpx/vp8/common/invtrans.h delete mode 100644 thirdparty/libvpx/vp8/common/loopfilter.h delete mode 100644 thirdparty/libvpx/vp8/common/loopfilter_filters.c delete mode 100644 thirdparty/libvpx/vp8/common/mbpitch.c delete mode 100644 thirdparty/libvpx/vp8/common/modecont.c delete mode 100644 thirdparty/libvpx/vp8/common/modecont.h delete mode 100644 thirdparty/libvpx/vp8/common/mv.h delete mode 100644 thirdparty/libvpx/vp8/common/onyxc_int.h delete mode 100644 thirdparty/libvpx/vp8/common/onyxd.h delete mode 100644 thirdparty/libvpx/vp8/common/ppflags.h delete mode 100644 thirdparty/libvpx/vp8/common/quant_common.c delete mode 100644 thirdparty/libvpx/vp8/common/quant_common.h delete mode 100644 thirdparty/libvpx/vp8/common/reconinter.c delete mode 100644 thirdparty/libvpx/vp8/common/reconinter.h delete mode 100644 thirdparty/libvpx/vp8/common/reconintra.c delete mode 100644 thirdparty/libvpx/vp8/common/reconintra.h delete mode 100644 thirdparty/libvpx/vp8/common/reconintra4x4.c delete mode 100644 thirdparty/libvpx/vp8/common/reconintra4x4.h delete mode 100644 thirdparty/libvpx/vp8/common/rtcd.c delete mode 100644 thirdparty/libvpx/vp8/common/setupintrarecon.c delete mode 100644 thirdparty/libvpx/vp8/common/setupintrarecon.h delete mode 100644 thirdparty/libvpx/vp8/common/swapyv12buffer.c delete mode 100644 thirdparty/libvpx/vp8/common/swapyv12buffer.h delete mode 100644 thirdparty/libvpx/vp8/common/systemdependent.h delete mode 100644 thirdparty/libvpx/vp8/common/threading.h delete mode 100644 thirdparty/libvpx/vp8/common/treecoder.c delete mode 100644 thirdparty/libvpx/vp8/common/treecoder.h delete mode 100644 thirdparty/libvpx/vp8/common/vp8_entropymodedata.h delete mode 100644 thirdparty/libvpx/vp8/common/vp8_loopfilter.c delete mode 100644 thirdparty/libvpx/vp8/common/x86/copy_sse2.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/copy_sse3.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/filter_x86.c delete mode 100644 thirdparty/libvpx/vp8/common/x86/filter_x86.h delete mode 100644 thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c delete mode 100644 thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c delete mode 100644 thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c delete mode 100644 thirdparty/libvpx/vp8/common/x86/recon_mmx.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/recon_sse2.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm delete mode 100644 thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c delete mode 100644 thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm delete mode 100644 thirdparty/libvpx/vp8/decoder/dboolhuff.c delete mode 100644 thirdparty/libvpx/vp8/decoder/dboolhuff.h delete mode 100644 thirdparty/libvpx/vp8/decoder/decodeframe.c delete mode 100644 thirdparty/libvpx/vp8/decoder/decodemv.c delete mode 100644 thirdparty/libvpx/vp8/decoder/decodemv.h delete mode 100644 thirdparty/libvpx/vp8/decoder/decoderthreading.h delete mode 100644 thirdparty/libvpx/vp8/decoder/detokenize.c delete mode 100644 thirdparty/libvpx/vp8/decoder/detokenize.h delete mode 100644 thirdparty/libvpx/vp8/decoder/onyxd_if.c delete mode 100644 thirdparty/libvpx/vp8/decoder/onyxd_int.h delete mode 100644 thirdparty/libvpx/vp8/decoder/threading.c delete mode 100644 thirdparty/libvpx/vp8/decoder/treereader.h delete mode 100644 thirdparty/libvpx/vp8/vp8_dx_iface.c delete mode 100644 thirdparty/libvpx/vp8_rtcd.h delete mode 100644 thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c delete mode 100644 thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_alloccommon.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_alloccommon.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_blockd.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_blockd.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_common.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_common_data.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_common_data.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_debugmodes.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_entropy.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_entropy.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymode.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymode.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymv.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymv.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_enums.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_filter.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_filter.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_frame_buffers.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_frame_buffers.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_idct.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_idct.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_loopfilter.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_loopfilter.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_mv.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_mvref_common.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_mvref_common.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_onyxc_int.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_ppflags.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_pred_common.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_pred_common.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_quant_common.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_quant_common.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_reconinter.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_reconinter.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_reconintra.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_reconintra.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_rtcd.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_scale.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_scale.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_scan.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_scan.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_seg_common.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_seg_common.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_thread_common.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_thread_common.h delete mode 100644 thirdparty/libvpx/vp9/common/vp9_tile_common.c delete mode 100644 thirdparty/libvpx/vp9/common/vp9_tile_common.h delete mode 100644 thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodemv.c delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodemv.h delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decoder.c delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decoder.h delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_detokenize.c delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_detokenize.h delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dthread.c delete mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dthread.h delete mode 100644 thirdparty/libvpx/vp9/vp9_dx_iface.c delete mode 100644 thirdparty/libvpx/vp9/vp9_dx_iface.h delete mode 100644 thirdparty/libvpx/vp9/vp9_iface_common.h delete mode 100644 thirdparty/libvpx/vp9_rtcd.h delete mode 100644 thirdparty/libvpx/vpx/internal/vpx_codec_internal.h delete mode 100644 thirdparty/libvpx/vpx/internal/vpx_psnr.h delete mode 100644 thirdparty/libvpx/vpx/src/vpx_codec.c delete mode 100644 thirdparty/libvpx/vpx/src/vpx_decoder.c delete mode 100644 thirdparty/libvpx/vpx/src/vpx_image.c delete mode 100644 thirdparty/libvpx/vpx/src/vpx_psnr.c delete mode 100644 thirdparty/libvpx/vpx/vp8.h delete mode 100644 thirdparty/libvpx/vpx/vp8dx.h delete mode 100644 thirdparty/libvpx/vpx/vpx_codec.h delete mode 100644 thirdparty/libvpx/vpx/vpx_decoder.h delete mode 100644 thirdparty/libvpx/vpx/vpx_encoder.h delete mode 100644 thirdparty/libvpx/vpx/vpx_frame_buffer.h delete mode 100644 thirdparty/libvpx/vpx/vpx_image.h delete mode 100644 thirdparty/libvpx/vpx/vpx_integer.h delete mode 100644 thirdparty/libvpx/vpx_config.asm delete mode 100644 thirdparty/libvpx/vpx_config.h delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c delete mode 100644 thirdparty/libvpx/vpx_dsp/bitreader.c delete mode 100644 thirdparty/libvpx/vpx_dsp/bitreader.h delete mode 100644 thirdparty/libvpx/vpx_dsp/bitreader_buffer.c delete mode 100644 thirdparty/libvpx/vpx_dsp/bitreader_buffer.h delete mode 100644 thirdparty/libvpx/vpx_dsp/intrapred.c delete mode 100644 thirdparty/libvpx/vpx_dsp/inv_txfm.c delete mode 100644 thirdparty/libvpx/vpx_dsp/inv_txfm.h delete mode 100644 thirdparty/libvpx/vpx_dsp/loopfilter.c delete mode 100644 thirdparty/libvpx/vpx_dsp/prob.c delete mode 100644 thirdparty/libvpx/vpx_dsp/prob.h delete mode 100644 thirdparty/libvpx/vpx_dsp/txfm_common.h delete mode 100644 thirdparty/libvpx/vpx_dsp/vpx_convolve.c delete mode 100644 thirdparty/libvpx/vpx_dsp/vpx_convolve.h delete mode 100644 thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h delete mode 100644 thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c delete mode 100644 thirdparty/libvpx/vpx_dsp/vpx_filter.h delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/convolve.h delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm delete mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm delete mode 100644 thirdparty/libvpx/vpx_dsp_rtcd.h delete mode 100644 thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h delete mode 100644 thirdparty/libvpx/vpx_mem/vpx_mem.c delete mode 100644 thirdparty/libvpx/vpx_mem/vpx_mem.h delete mode 100644 thirdparty/libvpx/vpx_ports/arm.h delete mode 100644 thirdparty/libvpx/vpx_ports/arm_cpudetect.c delete mode 100644 thirdparty/libvpx/vpx_ports/bitops.h delete mode 100644 thirdparty/libvpx/vpx_ports/config.h delete mode 100644 thirdparty/libvpx/vpx_ports/emmintrin_compat.h delete mode 100644 thirdparty/libvpx/vpx_ports/emms.asm delete mode 100644 thirdparty/libvpx/vpx_ports/mem.h delete mode 100644 thirdparty/libvpx/vpx_ports/mem_ops.h delete mode 100644 thirdparty/libvpx/vpx_ports/mem_ops_aligned.h delete mode 100644 thirdparty/libvpx/vpx_ports/msvc.h delete mode 100644 thirdparty/libvpx/vpx_ports/system_state.h delete mode 100644 thirdparty/libvpx/vpx_ports/vpx_once.h delete mode 100644 thirdparty/libvpx/vpx_ports/vpx_timer.h delete mode 100644 thirdparty/libvpx/vpx_ports/x86.h delete mode 100644 thirdparty/libvpx/vpx_ports/x86_abi_support.asm delete mode 100644 thirdparty/libvpx/vpx_scale/generic/yv12config.c delete mode 100644 thirdparty/libvpx/vpx_scale/generic/yv12extend.c delete mode 100644 thirdparty/libvpx/vpx_scale/vpx_scale.h delete mode 100644 thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c delete mode 100644 thirdparty/libvpx/vpx_scale/yv12config.h delete mode 100644 thirdparty/libvpx/vpx_scale_rtcd.h delete mode 100644 thirdparty/libvpx/vpx_util/endian_inl.h delete mode 100644 thirdparty/libvpx/vpx_util/vpx_thread.c delete mode 100644 thirdparty/libvpx/vpx_util/vpx_thread.h delete mode 100644 thirdparty/libvpx/vpx_version.h delete mode 100644 thirdparty/opus/COPYING delete mode 100644 thirdparty/opus/analysis.c delete mode 100644 thirdparty/opus/analysis.h delete mode 100644 thirdparty/opus/celt/_kiss_fft_guts.h delete mode 100644 thirdparty/opus/celt/arch.h delete mode 100644 thirdparty/opus/celt/arm/arm_celt_map.c delete mode 100644 thirdparty/opus/celt/arm/armcpu.c delete mode 100644 thirdparty/opus/celt/arm/armcpu.h delete mode 100644 thirdparty/opus/celt/arm/armopts.s.in delete mode 100644 thirdparty/opus/celt/arm/celt_ne10_fft.c delete mode 100644 thirdparty/opus/celt/arm/celt_ne10_mdct.c delete mode 100644 thirdparty/opus/celt/arm/celt_neon_intr.c delete mode 100644 thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S delete mode 100644 thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s delete mode 100644 thirdparty/opus/celt/arm/fft_arm.h delete mode 100644 thirdparty/opus/celt/arm/fixed_arm64.h delete mode 100644 thirdparty/opus/celt/arm/fixed_armv4.h delete mode 100644 thirdparty/opus/celt/arm/fixed_armv5e.h delete mode 100644 thirdparty/opus/celt/arm/kiss_fft_armv4.h delete mode 100644 thirdparty/opus/celt/arm/kiss_fft_armv5e.h delete mode 100644 thirdparty/opus/celt/arm/mdct_arm.h delete mode 100644 thirdparty/opus/celt/arm/pitch_arm.h delete mode 100644 thirdparty/opus/celt/bands.c delete mode 100644 thirdparty/opus/celt/bands.h delete mode 100644 thirdparty/opus/celt/celt.c delete mode 100644 thirdparty/opus/celt/celt.h delete mode 100644 thirdparty/opus/celt/celt_decoder.c delete mode 100644 thirdparty/opus/celt/celt_encoder.c delete mode 100644 thirdparty/opus/celt/celt_lpc.c delete mode 100644 thirdparty/opus/celt/celt_lpc.h delete mode 100644 thirdparty/opus/celt/cpu_support.h delete mode 100644 thirdparty/opus/celt/cwrs.c delete mode 100644 thirdparty/opus/celt/cwrs.h delete mode 100644 thirdparty/opus/celt/ecintrin.h delete mode 100644 thirdparty/opus/celt/entcode.c delete mode 100644 thirdparty/opus/celt/entcode.h delete mode 100644 thirdparty/opus/celt/entdec.c delete mode 100644 thirdparty/opus/celt/entdec.h delete mode 100644 thirdparty/opus/celt/entenc.c delete mode 100644 thirdparty/opus/celt/entenc.h delete mode 100644 thirdparty/opus/celt/fixed_debug.h delete mode 100644 thirdparty/opus/celt/fixed_generic.h delete mode 100644 thirdparty/opus/celt/float_cast.h delete mode 100644 thirdparty/opus/celt/kiss_fft.c delete mode 100644 thirdparty/opus/celt/kiss_fft.h delete mode 100644 thirdparty/opus/celt/laplace.c delete mode 100644 thirdparty/opus/celt/laplace.h delete mode 100644 thirdparty/opus/celt/mathops.c delete mode 100644 thirdparty/opus/celt/mathops.h delete mode 100644 thirdparty/opus/celt/mdct.c delete mode 100644 thirdparty/opus/celt/mdct.h delete mode 100644 thirdparty/opus/celt/mfrngcod.h delete mode 100644 thirdparty/opus/celt/mips/celt_mipsr1.h delete mode 100644 thirdparty/opus/celt/mips/fixed_generic_mipsr1.h delete mode 100644 thirdparty/opus/celt/mips/kiss_fft_mipsr1.h delete mode 100644 thirdparty/opus/celt/mips/mdct_mipsr1.h delete mode 100644 thirdparty/opus/celt/mips/pitch_mipsr1.h delete mode 100644 thirdparty/opus/celt/mips/vq_mipsr1.h delete mode 100644 thirdparty/opus/celt/modes.c delete mode 100644 thirdparty/opus/celt/modes.h delete mode 100644 thirdparty/opus/celt/opus_custom_demo.c delete mode 100644 thirdparty/opus/celt/os_support.h delete mode 100644 thirdparty/opus/celt/pitch.c delete mode 100644 thirdparty/opus/celt/pitch.h delete mode 100644 thirdparty/opus/celt/quant_bands.c delete mode 100644 thirdparty/opus/celt/quant_bands.h delete mode 100644 thirdparty/opus/celt/rate.c delete mode 100644 thirdparty/opus/celt/rate.h delete mode 100644 thirdparty/opus/celt/stack_alloc.h delete mode 100644 thirdparty/opus/celt/static_modes_fixed.h delete mode 100644 thirdparty/opus/celt/static_modes_fixed_arm_ne10.h delete mode 100644 thirdparty/opus/celt/static_modes_float.h delete mode 100644 thirdparty/opus/celt/static_modes_float_arm_ne10.h delete mode 100644 thirdparty/opus/celt/tests/test_unit_cwrs32.c delete mode 100644 thirdparty/opus/celt/tests/test_unit_dft.c delete mode 100644 thirdparty/opus/celt/tests/test_unit_entropy.c delete mode 100644 thirdparty/opus/celt/tests/test_unit_laplace.c delete mode 100644 thirdparty/opus/celt/tests/test_unit_mathops.c delete mode 100644 thirdparty/opus/celt/tests/test_unit_mdct.c delete mode 100644 thirdparty/opus/celt/tests/test_unit_rotation.c delete mode 100644 thirdparty/opus/celt/tests/test_unit_types.c delete mode 100644 thirdparty/opus/celt/vq.c delete mode 100644 thirdparty/opus/celt/vq.h delete mode 100644 thirdparty/opus/celt/x86/celt_lpc_sse.c delete mode 100644 thirdparty/opus/celt/x86/celt_lpc_sse.h delete mode 100644 thirdparty/opus/celt/x86/pitch_sse.c delete mode 100644 thirdparty/opus/celt/x86/pitch_sse.h delete mode 100644 thirdparty/opus/celt/x86/pitch_sse2.c delete mode 100644 thirdparty/opus/celt/x86/pitch_sse4_1.c delete mode 100644 thirdparty/opus/celt/x86/x86_celt_map.c delete mode 100644 thirdparty/opus/celt/x86/x86cpu.c delete mode 100644 thirdparty/opus/celt/x86/x86cpu.h delete mode 100644 thirdparty/opus/config.h delete mode 100644 thirdparty/opus/info.c delete mode 100644 thirdparty/opus/internal.c delete mode 100644 thirdparty/opus/internal.h delete mode 100644 thirdparty/opus/mlp.c delete mode 100644 thirdparty/opus/mlp.h delete mode 100644 thirdparty/opus/mlp_data.c delete mode 100644 thirdparty/opus/opus.c delete mode 100644 thirdparty/opus/opus/opus.h delete mode 100644 thirdparty/opus/opus/opus_custom.h delete mode 100644 thirdparty/opus/opus/opus_defines.h delete mode 100644 thirdparty/opus/opus/opus_multistream.h delete mode 100644 thirdparty/opus/opus/opus_types.h delete mode 100644 thirdparty/opus/opus/opusfile.h delete mode 100644 thirdparty/opus/opus_compare.c delete mode 100644 thirdparty/opus/opus_decoder.c delete mode 100644 thirdparty/opus/opus_encoder.c delete mode 100644 thirdparty/opus/opus_multistream.c delete mode 100644 thirdparty/opus/opus_multistream_decoder.c delete mode 100644 thirdparty/opus/opus_multistream_encoder.c delete mode 100644 thirdparty/opus/opus_private.h delete mode 100644 thirdparty/opus/opusfile.c delete mode 100644 thirdparty/opus/repacketizer.c delete mode 100644 thirdparty/opus/repacketizer_demo.c delete mode 100644 thirdparty/opus/silk/A2NLSF.c delete mode 100644 thirdparty/opus/silk/API.h delete mode 100644 thirdparty/opus/silk/CNG.c delete mode 100644 thirdparty/opus/silk/HP_variable_cutoff.c delete mode 100644 thirdparty/opus/silk/Inlines.h delete mode 100644 thirdparty/opus/silk/LPC_analysis_filter.c delete mode 100644 thirdparty/opus/silk/LPC_inv_pred_gain.c delete mode 100644 thirdparty/opus/silk/LP_variable_cutoff.c delete mode 100644 thirdparty/opus/silk/MacroCount.h delete mode 100644 thirdparty/opus/silk/MacroDebug.h delete mode 100644 thirdparty/opus/silk/NLSF2A.c delete mode 100644 thirdparty/opus/silk/NLSF_VQ.c delete mode 100644 thirdparty/opus/silk/NLSF_VQ_weights_laroia.c delete mode 100644 thirdparty/opus/silk/NLSF_decode.c delete mode 100644 thirdparty/opus/silk/NLSF_del_dec_quant.c delete mode 100644 thirdparty/opus/silk/NLSF_encode.c delete mode 100644 thirdparty/opus/silk/NLSF_stabilize.c delete mode 100644 thirdparty/opus/silk/NLSF_unpack.c delete mode 100644 thirdparty/opus/silk/NSQ.c delete mode 100644 thirdparty/opus/silk/NSQ.h delete mode 100644 thirdparty/opus/silk/NSQ_del_dec.c delete mode 100644 thirdparty/opus/silk/PLC.c delete mode 100644 thirdparty/opus/silk/PLC.h delete mode 100644 thirdparty/opus/silk/SigProc_FIX.h delete mode 100644 thirdparty/opus/silk/VAD.c delete mode 100644 thirdparty/opus/silk/VQ_WMat_EC.c delete mode 100644 thirdparty/opus/silk/ana_filt_bank_1.c delete mode 100644 thirdparty/opus/silk/arm/NSQ_neon.c delete mode 100644 thirdparty/opus/silk/arm/NSQ_neon.h delete mode 100644 thirdparty/opus/silk/arm/SigProc_FIX_armv4.h delete mode 100644 thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h delete mode 100644 thirdparty/opus/silk/arm/arm_silk_map.c delete mode 100644 thirdparty/opus/silk/arm/macros_arm64.h delete mode 100644 thirdparty/opus/silk/arm/macros_armv4.h delete mode 100644 thirdparty/opus/silk/arm/macros_armv5e.h delete mode 100644 thirdparty/opus/silk/biquad_alt.c delete mode 100644 thirdparty/opus/silk/bwexpander.c delete mode 100644 thirdparty/opus/silk/bwexpander_32.c delete mode 100644 thirdparty/opus/silk/check_control_input.c delete mode 100644 thirdparty/opus/silk/code_signs.c delete mode 100644 thirdparty/opus/silk/control.h delete mode 100644 thirdparty/opus/silk/control_SNR.c delete mode 100644 thirdparty/opus/silk/control_audio_bandwidth.c delete mode 100644 thirdparty/opus/silk/control_codec.c delete mode 100644 thirdparty/opus/silk/debug.c delete mode 100644 thirdparty/opus/silk/debug.h delete mode 100644 thirdparty/opus/silk/dec_API.c delete mode 100644 thirdparty/opus/silk/decode_core.c delete mode 100644 thirdparty/opus/silk/decode_frame.c delete mode 100644 thirdparty/opus/silk/decode_indices.c delete mode 100644 thirdparty/opus/silk/decode_parameters.c delete mode 100644 thirdparty/opus/silk/decode_pitch.c delete mode 100644 thirdparty/opus/silk/decode_pulses.c delete mode 100644 thirdparty/opus/silk/decoder_set_fs.c delete mode 100644 thirdparty/opus/silk/define.h delete mode 100644 thirdparty/opus/silk/enc_API.c delete mode 100644 thirdparty/opus/silk/encode_indices.c delete mode 100644 thirdparty/opus/silk/encode_pulses.c delete mode 100644 thirdparty/opus/silk/errors.h delete mode 100644 thirdparty/opus/silk/fixed/LTP_analysis_filter_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/apply_sine_window_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/autocorr_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/burg_modified_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/corrMatrix_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/encode_frame_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/find_LPC_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/find_LTP_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/k2a_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/k2a_Q16_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/main_FIX.h delete mode 100644 thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h delete mode 100644 thirdparty/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h delete mode 100644 thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h delete mode 100644 thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/prefilter_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/process_gains_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/regularize_correlations_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/residual_energy16_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/residual_energy_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/schur64_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/schur_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/solve_LS_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/structs_FIX.h delete mode 100644 thirdparty/opus/silk/fixed/vector_ops_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c delete mode 100644 thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c delete mode 100644 thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c delete mode 100644 thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c delete mode 100644 thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c delete mode 100644 thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c delete mode 100644 thirdparty/opus/silk/float/LTP_analysis_filter_FLP.c delete mode 100644 thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c delete mode 100644 thirdparty/opus/silk/float/SigProc_FLP.h delete mode 100644 thirdparty/opus/silk/float/apply_sine_window_FLP.c delete mode 100644 thirdparty/opus/silk/float/autocorrelation_FLP.c delete mode 100644 thirdparty/opus/silk/float/burg_modified_FLP.c delete mode 100644 thirdparty/opus/silk/float/bwexpander_FLP.c delete mode 100644 thirdparty/opus/silk/float/corrMatrix_FLP.c delete mode 100644 thirdparty/opus/silk/float/encode_frame_FLP.c delete mode 100644 thirdparty/opus/silk/float/energy_FLP.c delete mode 100644 thirdparty/opus/silk/float/find_LPC_FLP.c delete mode 100644 thirdparty/opus/silk/float/find_LTP_FLP.c delete mode 100644 thirdparty/opus/silk/float/find_pitch_lags_FLP.c delete mode 100644 thirdparty/opus/silk/float/find_pred_coefs_FLP.c delete mode 100644 thirdparty/opus/silk/float/inner_product_FLP.c delete mode 100644 thirdparty/opus/silk/float/k2a_FLP.c delete mode 100644 thirdparty/opus/silk/float/levinsondurbin_FLP.c delete mode 100644 thirdparty/opus/silk/float/main_FLP.h delete mode 100644 thirdparty/opus/silk/float/noise_shape_analysis_FLP.c delete mode 100644 thirdparty/opus/silk/float/pitch_analysis_core_FLP.c delete mode 100644 thirdparty/opus/silk/float/prefilter_FLP.c delete mode 100644 thirdparty/opus/silk/float/process_gains_FLP.c delete mode 100644 thirdparty/opus/silk/float/regularize_correlations_FLP.c delete mode 100644 thirdparty/opus/silk/float/residual_energy_FLP.c delete mode 100644 thirdparty/opus/silk/float/scale_copy_vector_FLP.c delete mode 100644 thirdparty/opus/silk/float/scale_vector_FLP.c delete mode 100644 thirdparty/opus/silk/float/schur_FLP.c delete mode 100644 thirdparty/opus/silk/float/solve_LS_FLP.c delete mode 100644 thirdparty/opus/silk/float/sort_FLP.c delete mode 100644 thirdparty/opus/silk/float/structs_FLP.h delete mode 100644 thirdparty/opus/silk/float/warped_autocorrelation_FLP.c delete mode 100644 thirdparty/opus/silk/float/wrappers_FLP.c delete mode 100644 thirdparty/opus/silk/gain_quant.c delete mode 100644 thirdparty/opus/silk/init_decoder.c delete mode 100644 thirdparty/opus/silk/init_encoder.c delete mode 100644 thirdparty/opus/silk/inner_prod_aligned.c delete mode 100644 thirdparty/opus/silk/interpolate.c delete mode 100644 thirdparty/opus/silk/lin2log.c delete mode 100644 thirdparty/opus/silk/log2lin.c delete mode 100644 thirdparty/opus/silk/macros.h delete mode 100644 thirdparty/opus/silk/main.h delete mode 100644 thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h delete mode 100644 thirdparty/opus/silk/mips/macros_mipsr1.h delete mode 100644 thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h delete mode 100644 thirdparty/opus/silk/pitch_est_defines.h delete mode 100644 thirdparty/opus/silk/pitch_est_tables.c delete mode 100644 thirdparty/opus/silk/process_NLSFs.c delete mode 100644 thirdparty/opus/silk/quant_LTP_gains.c delete mode 100644 thirdparty/opus/silk/resampler.c delete mode 100644 thirdparty/opus/silk/resampler_down2.c delete mode 100644 thirdparty/opus/silk/resampler_down2_3.c delete mode 100644 thirdparty/opus/silk/resampler_private.h delete mode 100644 thirdparty/opus/silk/resampler_private_AR2.c delete mode 100644 thirdparty/opus/silk/resampler_private_IIR_FIR.c delete mode 100644 thirdparty/opus/silk/resampler_private_down_FIR.c delete mode 100644 thirdparty/opus/silk/resampler_private_up2_HQ.c delete mode 100644 thirdparty/opus/silk/resampler_rom.c delete mode 100644 thirdparty/opus/silk/resampler_rom.h delete mode 100644 thirdparty/opus/silk/resampler_structs.h delete mode 100644 thirdparty/opus/silk/shell_coder.c delete mode 100644 thirdparty/opus/silk/sigm_Q15.c delete mode 100644 thirdparty/opus/silk/sort.c delete mode 100644 thirdparty/opus/silk/stereo_LR_to_MS.c delete mode 100644 thirdparty/opus/silk/stereo_MS_to_LR.c delete mode 100644 thirdparty/opus/silk/stereo_decode_pred.c delete mode 100644 thirdparty/opus/silk/stereo_encode_pred.c delete mode 100644 thirdparty/opus/silk/stereo_find_predictor.c delete mode 100644 thirdparty/opus/silk/stereo_quant_pred.c delete mode 100644 thirdparty/opus/silk/structs.h delete mode 100644 thirdparty/opus/silk/sum_sqr_shift.c delete mode 100644 thirdparty/opus/silk/table_LSF_cos.c delete mode 100644 thirdparty/opus/silk/tables.h delete mode 100644 thirdparty/opus/silk/tables_LTP.c delete mode 100644 thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c delete mode 100644 thirdparty/opus/silk/tables_NLSF_CB_WB.c delete mode 100644 thirdparty/opus/silk/tables_gain.c delete mode 100644 thirdparty/opus/silk/tables_other.c delete mode 100644 thirdparty/opus/silk/tables_pitch_lag.c delete mode 100644 thirdparty/opus/silk/tables_pulses_per_block.c delete mode 100644 thirdparty/opus/silk/tuning_parameters.h delete mode 100644 thirdparty/opus/silk/typedef.h delete mode 100644 thirdparty/opus/silk/x86/NSQ_del_dec_sse.c delete mode 100644 thirdparty/opus/silk/x86/NSQ_sse.c delete mode 100644 thirdparty/opus/silk/x86/SigProc_FIX_sse.h delete mode 100644 thirdparty/opus/silk/x86/VAD_sse.c delete mode 100644 thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c delete mode 100644 thirdparty/opus/silk/x86/main_sse.h delete mode 100644 thirdparty/opus/silk/x86/x86_silk_map.c delete mode 100644 thirdparty/opus/stream.c delete mode 100644 thirdparty/opus/tansig_table.h diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 8ad6997463..ca7f065bd6 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -235,16 +235,6 @@ Copyright: 1995-2019, The PNG Reference Library Authors. 1995-1996, Guy Eric Schalnat, Group 42, Inc. License: Zlib -Files: ./thirdparty/libsimplewebm/ -Comment: libsimplewebm -Copyright: 2016, Błażej Szczygieł -License: Expat - -Files: ./thirdparty/libsimplewebm/libwebm/ -Comment: The WebM Project -Copyright: 2010, Google Inc. -License: BSD-3-clause - Files: ./thirdparty/libtheora/ Comment: OggTheora Copyright: 2002-2009, Xiph.org Foundation @@ -255,17 +245,6 @@ Comment: OggVorbis Copyright: 2002-2015, Xiph.org Foundation License: BSD-3-clause -Files: ./thirdparty/libvpx/ -Comment: The WebM Project -Copyright: 2010, The WebM Project authors. -License: BSD-3-clause - -Files: ./thirdparty/libvpx/third_party/android/cpu-features.c - ./thirdparty/libvpx/third_party/android/cpu-features.h -Comment: The Android Open Source Project -Copyright: 2010, The Android Open Source Project -License: BSD-2-clause - Files: ./thirdparty/libwebp/ Comment: WebP codec Copyright: 2010, Google Inc. @@ -388,14 +367,6 @@ Comment: Intel Open Image Denoise Copyright: 2009-2019, Intel Corporation License: Apache-2.0 -Files: ./thirdparty/opus/ -Comment: Opus -Copyright: 2001-2011, Xiph.Org, Skype Limited, Octasic, - Jean-Marc Valin, Timothy B. Terriberry, - CSIRO, Gregory Maxwell, Mark Borgerding, - Erik de Castro Lopo -License: BSD-3-clause - Files: ./thirdparty/pcre2/ Comment: PCRE2 Copyright: 1997-2020, University of Cambridge diff --git a/SConstruct b/SConstruct index ed57a66cb7..9e3930f756 100644 --- a/SConstruct +++ b/SConstruct @@ -166,12 +166,10 @@ opts.Add(BoolVariable("builtin_libogg", "Use the built-in libogg library", True) opts.Add(BoolVariable("builtin_libpng", "Use the built-in libpng library", True)) opts.Add(BoolVariable("builtin_libtheora", "Use the built-in libtheora library", True)) opts.Add(BoolVariable("builtin_libvorbis", "Use the built-in libvorbis library", True)) -opts.Add(BoolVariable("builtin_libvpx", "Use the built-in libvpx library", True)) opts.Add(BoolVariable("builtin_libwebp", "Use the built-in libwebp library", True)) opts.Add(BoolVariable("builtin_wslay", "Use the built-in wslay library", True)) opts.Add(BoolVariable("builtin_mbedtls", "Use the built-in mbedTLS library", True)) opts.Add(BoolVariable("builtin_miniupnpc", "Use the built-in miniupnpc library", True)) -opts.Add(BoolVariable("builtin_opus", "Use the built-in Opus library", True)) opts.Add(BoolVariable("builtin_pcre2", "Use the built-in PCRE2 library", True)) opts.Add(BoolVariable("builtin_pcre2_with_jit", "Use JIT compiler for the built-in PCRE2 library", True)) opts.Add(BoolVariable("builtin_recast", "Use the built-in Recast library", True)) diff --git a/doc/classes/VideoPlayer.xml b/doc/classes/VideoPlayer.xml index 4f60b9d567..c8590988f5 100644 --- a/doc/classes/VideoPlayer.xml +++ b/doc/classes/VideoPlayer.xml @@ -5,9 +5,9 @@ Control node for playing video streams using [VideoStream] resources. - Supported video formats are [url=https://www.webmproject.org/]WebM[/url] ([code].webm[/code], [VideoStreamWebm]), [url=https://www.theora.org/]Ogg Theora[/url] ([code].ogv[/code], [VideoStreamTheora]), and any format exposed via a GDNative plugin using [VideoStreamGDNative]. + Supported video formats are [url=https://www.theora.org/]Ogg Theora[/url] ([code].ogv[/code], [VideoStreamTheora]) and any format exposed via a GDNative plugin using [VideoStreamGDNative]. [b]Note:[/b] Due to a bug, VideoPlayer does not support localization remapping yet. - [b]Warning:[/b] On HTML5, video playback [i]will[/i] perform poorly due to missing architecture-specific assembly optimizations, especially for VP8/VP9. + [b]Warning:[/b] On HTML5, video playback [i]will[/i] perform poorly due to missing architecture-specific assembly optimizations. diff --git a/modules/ogg/SCsub b/modules/ogg/SCsub index e415d92498..f15525648f 100644 --- a/modules/ogg/SCsub +++ b/modules/ogg/SCsub @@ -3,9 +3,6 @@ Import("env") Import("env_modules") -# Only kept to build the thirdparty library used by the theora and webm -# modules. - env_ogg = env_modules.Clone() # Thirdparty source files diff --git a/modules/opus/SCsub b/modules/opus/SCsub deleted file mode 100644 index 1437cd86df..0000000000 --- a/modules/opus/SCsub +++ /dev/null @@ -1,252 +0,0 @@ -#!/usr/bin/env python - -Import("env") -Import("env_modules") - -# Only kept to build the thirdparty library used by the webm module. -# AudioStreamOpus was dropped in 3.0 due to incompatibility with the new audio -# engine. If you want to port it, fetch it from the Git history. - -env_opus = env_modules.Clone() - -# Thirdparty source files - -thirdparty_obj = [] - -# Thirdparty source files -if env["builtin_opus"]: - thirdparty_dir = "#thirdparty/opus/" - - thirdparty_sources = [ - # Sync with opus_sources.mk - "opus.c", - "opus_decoder.c", - "opus_encoder.c", - "opus_multistream.c", - "opus_multistream_encoder.c", - "opus_multistream_decoder.c", - "repacketizer.c", - "analysis.c", - "mlp.c", - "mlp_data.c", - # Sync with libopusfile Makefile.am - "info.c", - "internal.c", - "opusfile.c", - "stream.c", - # Sync with celt_sources.mk - "celt/bands.c", - "celt/celt.c", - "celt/celt_encoder.c", - "celt/celt_decoder.c", - "celt/cwrs.c", - "celt/entcode.c", - "celt/entdec.c", - "celt/entenc.c", - "celt/kiss_fft.c", - "celt/laplace.c", - "celt/mathops.c", - "celt/mdct.c", - "celt/modes.c", - "celt/pitch.c", - "celt/celt_lpc.c", - "celt/quant_bands.c", - "celt/rate.c", - "celt/vq.c", - # "celt/arm/arm_celt_map.c", - # "celt/arm/armcpu.c", - # "celt/arm/celt_ne10_fft.c", - # "celt/arm/celt_ne10_mdct.c", - # "celt/arm/celt_neon_intr.c", - # Sync with silk_sources.mk - "silk/CNG.c", - "silk/code_signs.c", - "silk/init_decoder.c", - "silk/decode_core.c", - "silk/decode_frame.c", - "silk/decode_parameters.c", - "silk/decode_indices.c", - "silk/decode_pulses.c", - "silk/decoder_set_fs.c", - "silk/dec_API.c", - "silk/enc_API.c", - "silk/encode_indices.c", - "silk/encode_pulses.c", - "silk/gain_quant.c", - "silk/interpolate.c", - "silk/LP_variable_cutoff.c", - "silk/NLSF_decode.c", - "silk/NSQ.c", - "silk/NSQ_del_dec.c", - "silk/PLC.c", - "silk/shell_coder.c", - "silk/tables_gain.c", - "silk/tables_LTP.c", - "silk/tables_NLSF_CB_NB_MB.c", - "silk/tables_NLSF_CB_WB.c", - "silk/tables_other.c", - "silk/tables_pitch_lag.c", - "silk/tables_pulses_per_block.c", - "silk/VAD.c", - "silk/control_audio_bandwidth.c", - "silk/quant_LTP_gains.c", - "silk/VQ_WMat_EC.c", - "silk/HP_variable_cutoff.c", - "silk/NLSF_encode.c", - "silk/NLSF_VQ.c", - "silk/NLSF_unpack.c", - "silk/NLSF_del_dec_quant.c", - "silk/process_NLSFs.c", - "silk/stereo_LR_to_MS.c", - "silk/stereo_MS_to_LR.c", - "silk/check_control_input.c", - "silk/control_SNR.c", - "silk/init_encoder.c", - "silk/control_codec.c", - "silk/A2NLSF.c", - "silk/ana_filt_bank_1.c", - "silk/biquad_alt.c", - "silk/bwexpander_32.c", - "silk/bwexpander.c", - "silk/debug.c", - "silk/decode_pitch.c", - "silk/inner_prod_aligned.c", - "silk/lin2log.c", - "silk/log2lin.c", - "silk/LPC_analysis_filter.c", - "silk/LPC_inv_pred_gain.c", - "silk/table_LSF_cos.c", - "silk/NLSF2A.c", - "silk/NLSF_stabilize.c", - "silk/NLSF_VQ_weights_laroia.c", - "silk/pitch_est_tables.c", - "silk/resampler.c", - "silk/resampler_down2_3.c", - "silk/resampler_down2.c", - "silk/resampler_private_AR2.c", - "silk/resampler_private_down_FIR.c", - "silk/resampler_private_IIR_FIR.c", - "silk/resampler_private_up2_HQ.c", - "silk/resampler_rom.c", - "silk/sigm_Q15.c", - "silk/sort.c", - "silk/sum_sqr_shift.c", - "silk/stereo_decode_pred.c", - "silk/stereo_encode_pred.c", - "silk/stereo_find_predictor.c", - "silk/stereo_quant_pred.c", - ] - - opus_sources_silk = [] - - if env["platform"] in ["android", "iphone", "javascript"]: - env_opus.Append(CPPDEFINES=["FIXED_POINT"]) - opus_sources_silk = [ - "silk/fixed/LTP_analysis_filter_FIX.c", - "silk/fixed/LTP_scale_ctrl_FIX.c", - "silk/fixed/corrMatrix_FIX.c", - "silk/fixed/encode_frame_FIX.c", - "silk/fixed/find_LPC_FIX.c", - "silk/fixed/find_LTP_FIX.c", - "silk/fixed/find_pitch_lags_FIX.c", - "silk/fixed/find_pred_coefs_FIX.c", - "silk/fixed/noise_shape_analysis_FIX.c", - "silk/fixed/prefilter_FIX.c", - "silk/fixed/process_gains_FIX.c", - "silk/fixed/regularize_correlations_FIX.c", - "silk/fixed/residual_energy16_FIX.c", - "silk/fixed/residual_energy_FIX.c", - "silk/fixed/solve_LS_FIX.c", - "silk/fixed/warped_autocorrelation_FIX.c", - "silk/fixed/apply_sine_window_FIX.c", - "silk/fixed/autocorr_FIX.c", - "silk/fixed/burg_modified_FIX.c", - "silk/fixed/k2a_FIX.c", - "silk/fixed/k2a_Q16_FIX.c", - "silk/fixed/pitch_analysis_core_FIX.c", - "silk/fixed/vector_ops_FIX.c", - "silk/fixed/schur64_FIX.c", - "silk/fixed/schur_FIX.c", - ] - else: - opus_sources_silk = [ - "silk/float/apply_sine_window_FLP.c", - "silk/float/corrMatrix_FLP.c", - "silk/float/encode_frame_FLP.c", - "silk/float/find_LPC_FLP.c", - "silk/float/find_LTP_FLP.c", - "silk/float/find_pitch_lags_FLP.c", - "silk/float/find_pred_coefs_FLP.c", - "silk/float/LPC_analysis_filter_FLP.c", - "silk/float/LTP_analysis_filter_FLP.c", - "silk/float/LTP_scale_ctrl_FLP.c", - "silk/float/noise_shape_analysis_FLP.c", - "silk/float/prefilter_FLP.c", - "silk/float/process_gains_FLP.c", - "silk/float/regularize_correlations_FLP.c", - "silk/float/residual_energy_FLP.c", - "silk/float/solve_LS_FLP.c", - "silk/float/warped_autocorrelation_FLP.c", - "silk/float/wrappers_FLP.c", - "silk/float/autocorrelation_FLP.c", - "silk/float/burg_modified_FLP.c", - "silk/float/bwexpander_FLP.c", - "silk/float/energy_FLP.c", - "silk/float/inner_product_FLP.c", - "silk/float/k2a_FLP.c", - "silk/float/levinsondurbin_FLP.c", - "silk/float/LPC_inv_pred_gain_FLP.c", - "silk/float/pitch_analysis_core_FLP.c", - "silk/float/scale_copy_vector_FLP.c", - "silk/float/scale_vector_FLP.c", - "silk/float/schur_FLP.c", - "silk/float/sort_FLP.c", - ] - - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources + opus_sources_silk] - - # also requires libogg - if env["builtin_libogg"]: - env_opus.Prepend(CPPPATH=["#thirdparty/libogg"]) - - env_opus.Append(CPPDEFINES=["HAVE_CONFIG_H"]) - - thirdparty_include_paths = [ - "", - "celt", - "opus", - "silk", - "silk/fixed", - "silk/float", - ] - env_opus.Prepend(CPPPATH=[thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) - - if env["platform"] == "android": - if "android_arch" in env and env["android_arch"] == "armv7": - env_opus.Append(CPPDEFINES=["OPUS_ARM_OPT"]) - elif "android_arch" in env and env["android_arch"] == "arm64v8": - env_opus.Append(CPPDEFINES=["OPUS_ARM64_OPT"]) - elif env["platform"] == "iphone": - if "arch" in env and env["arch"] == "arm": - env_opus.Append(CPPDEFINES=["OPUS_ARM_OPT"]) - elif "arch" in env and env["arch"] == "arm64": - env_opus.Append(CPPDEFINES=["OPUS_ARM64_OPT"]) - elif env["platform"] == "osx": - if "arch" in env and env["arch"] == "arm64": - env_opus.Append(CPPDEFINES=["OPUS_ARM64_OPT"]) - - env_thirdparty = env_opus.Clone() - env_thirdparty.disable_warnings() - env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) - env.modules_sources += thirdparty_obj - - -# Godot source files - -module_obj = [] - -env_opus.add_source_files(module_obj, "*.cpp") -env.modules_sources += module_obj - -# Needed to force rebuilding the module files when the thirdparty library is updated. -env.Depends(module_obj, thirdparty_obj) diff --git a/modules/opus/config.py b/modules/opus/config.py deleted file mode 100644 index 9ff7b2dece..0000000000 --- a/modules/opus/config.py +++ /dev/null @@ -1,6 +0,0 @@ -def can_build(env, platform): - return env.module_check_dependencies("opus", ["ogg"]) - - -def configure(env): - pass diff --git a/modules/opus/register_types.cpp b/modules/opus/register_types.cpp deleted file mode 100644 index 02874a9a4b..0000000000 --- a/modules/opus/register_types.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/*************************************************************************/ -/* register_types.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "register_types.h" - -// Dummy module as libvorbis is needed by other modules (theora ...) - -void register_opus_types() {} - -void unregister_opus_types() {} diff --git a/modules/opus/register_types.h b/modules/opus/register_types.h deleted file mode 100644 index af889cf809..0000000000 --- a/modules/opus/register_types.h +++ /dev/null @@ -1,37 +0,0 @@ -/*************************************************************************/ -/* register_types.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef OPUS_REGISTER_TYPES_H -#define OPUS_REGISTER_TYPES_H - -void register_opus_types(); -void unregister_opus_types(); - -#endif // OPUS_REGISTER_TYPES_H diff --git a/modules/theora/doc_classes/VideoStreamTheora.xml b/modules/theora/doc_classes/VideoStreamTheora.xml index 2dfcd27dff..725f87b046 100644 --- a/modules/theora/doc_classes/VideoStreamTheora.xml +++ b/modules/theora/doc_classes/VideoStreamTheora.xml @@ -4,7 +4,7 @@ [VideoStream] resource for Ogg Theora videos. - [VideoStream] resource handling the [url=https://www.theora.org/]Ogg Theora[/url] video format with [code].ogv[/code] extension. The Theora codec is less efficient than [VideoStreamWebm]'s VP8 and VP9, but it requires less CPU resources to decode. The Theora codec is decoded on the CPU. + [VideoStream] resource handling the [url=https://www.theora.org/]Ogg Theora[/url] video format with [code].ogv[/code] extension. The Theora codec is decoded on the CPU. [b]Note:[/b] While Ogg Theora videos can also have an [code].ogg[/code] extension, you will have to rename the extension to [code].ogv[/code] to use those videos within Godot. diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp index 8e80dfffca..ef434f107e 100644 --- a/modules/theora/video_stream_theora.cpp +++ b/modules/theora/video_stream_theora.cpp @@ -603,7 +603,7 @@ float VideoStreamPlaybackTheora::get_playback_position() const { }; void VideoStreamPlaybackTheora::seek(float p_time) { - WARN_PRINT_ONCE("Seeking in Theora and WebM videos is not implemented yet (it's only supported for GDNative-provided video streams)."); + WARN_PRINT_ONCE("Seeking in Theora videos is not implemented yet (it's only supported for GDNative-provided video streams)."); } void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback, void *p_userdata) { diff --git a/modules/webm/SCsub b/modules/webm/SCsub deleted file mode 100644 index 44e80e2870..0000000000 --- a/modules/webm/SCsub +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python - -Import("env") -Import("env_modules") - -env_webm = env_modules.Clone() - -# Thirdparty source files - -thirdparty_obj = [] - -thirdparty_dir = "#thirdparty/libsimplewebm/" -thirdparty_sources = [ - "libwebm/mkvparser/mkvparser.cc", - "OpusVorbisDecoder.cpp", - "VPXDecoder.cpp", - "WebMDemuxer.cpp", -] -thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - -env_webm.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "libwebm/"]) - -# also requires libogg, libvorbis and libopus -if env["builtin_libogg"]: - env_webm.Prepend(CPPPATH=["#thirdparty/libogg"]) -if env["builtin_libvorbis"]: - env_webm.Prepend(CPPPATH=["#thirdparty/libvorbis"]) -if env["builtin_opus"]: - env_webm.Prepend(CPPPATH=["#thirdparty/opus"]) - -if env["builtin_libvpx"]: - env_webm.Prepend(CPPPATH=["#thirdparty/libvpx"]) - SConscript("libvpx/SCsub") - -env_thirdparty = env_webm.Clone() -env_thirdparty.disable_warnings() -env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) -env.modules_sources += thirdparty_obj - -# Godot source files - -module_obj = [] - -env_webm.add_source_files(module_obj, "*.cpp") -env.modules_sources += module_obj - -# Needed to force rebuilding the module files when the thirdparty library is updated. -env.Depends(module_obj, thirdparty_obj) diff --git a/modules/webm/config.py b/modules/webm/config.py deleted file mode 100644 index 99f8ace114..0000000000 --- a/modules/webm/config.py +++ /dev/null @@ -1,19 +0,0 @@ -def can_build(env, platform): - if platform in ["iphone"]: - return False - - return env.module_check_dependencies("webm", ["ogg", "opus", "vorbis"]) - - -def configure(env): - pass - - -def get_doc_classes(): - return [ - "VideoStreamWebm", - ] - - -def get_doc_path(): - return "doc_classes" diff --git a/modules/webm/doc_classes/VideoStreamWebm.xml b/modules/webm/doc_classes/VideoStreamWebm.xml deleted file mode 100644 index e04d02d6ab..0000000000 --- a/modules/webm/doc_classes/VideoStreamWebm.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - [VideoStream] resource for WebM videos. - - - [VideoStream] resource handling the [url=https://www.webmproject.org/]WebM[/url] video format with [code].webm[/code] extension. Both the VP8 and VP9 codecs are supported. The VP8 and VP9 codecs are more efficient than [VideoStreamTheora], but they require more CPU resources to decode (especially VP9). Both the VP8 and VP9 codecs are decoded on the CPU. - [b]Note:[/b] Alpha channel (also known as transparency) is not supported. The video will always appear to have a black background, even if it originally contains an alpha channel. - [b]Note:[/b] There are known bugs and performance issues with WebM video playback in Godot. If you run into problems, try using the Ogg Theora format instead: [VideoStreamTheora] - - - - - - - - Returns the WebM video file handled by this [VideoStreamWebm]. - - - - - - - Sets the WebM video file that this [VideoStreamWebm] resource handles. The [code]file[/code] name should have the [code].webm[/code] extension. - - - - diff --git a/modules/webm/libvpx/SCsub b/modules/webm/libvpx/SCsub deleted file mode 100644 index 4334cf732b..0000000000 --- a/modules/webm/libvpx/SCsub +++ /dev/null @@ -1,382 +0,0 @@ -#!/usr/bin/env python - -Import("env") -Import("env_modules") - -# Thirdparty sources - -libvpx_dir = "#thirdparty/libvpx/" - -libvpx_sources = [ - "vp8/vp8_dx_iface.c", - "vp8/common/generic/systemdependent.c", - "vp8/common/alloccommon.c", - "vp8/common/blockd.c", - "vp8/common/copy_c.c", - "vp8/common/debugmodes.c", - "vp8/common/dequantize.c", - "vp8/common/entropy.c", - "vp8/common/entropymode.c", - "vp8/common/entropymv.c", - "vp8/common/extend.c", - "vp8/common/filter.c", - "vp8/common/findnearmv.c", - "vp8/common/idct_blk.c", - "vp8/common/idctllm.c", - "vp8/common/loopfilter_filters.c", - "vp8/common/mbpitch.c", - "vp8/common/modecont.c", - "vp8/common/quant_common.c", - "vp8/common/reconinter.c", - "vp8/common/reconintra.c", - "vp8/common/reconintra4x4.c", - "vp8/common/rtcd.c", - "vp8/common/setupintrarecon.c", - "vp8/common/swapyv12buffer.c", - "vp8/common/treecoder.c", - "vp8/common/vp8_loopfilter.c", - "vp8/decoder/dboolhuff.c", - "vp8/decoder/decodeframe.c", - "vp8/decoder/decodemv.c", - "vp8/decoder/detokenize.c", - "vp8/decoder/onyxd_if.c", - "vp9/vp9_dx_iface.c", - "vp9/common/vp9_alloccommon.c", - "vp9/common/vp9_blockd.c", - "vp9/common/vp9_common_data.c", - "vp9/common/vp9_debugmodes.c", - "vp9/common/vp9_entropy.c", - "vp9/common/vp9_entropymode.c", - "vp9/common/vp9_entropymv.c", - "vp9/common/vp9_filter.c", - "vp9/common/vp9_frame_buffers.c", - "vp9/common/vp9_idct.c", - "vp9/common/vp9_loopfilter.c", - "vp9/common/vp9_mvref_common.c", - "vp9/common/vp9_pred_common.c", - "vp9/common/vp9_quant_common.c", - "vp9/common/vp9_reconinter.c", - "vp9/common/vp9_reconintra.c", - "vp9/common/vp9_rtcd.c", - "vp9/common/vp9_scale.c", - "vp9/common/vp9_scan.c", - "vp9/common/vp9_seg_common.c", - "vp9/common/vp9_thread_common.c", - "vp9/common/vp9_tile_common.c", - "vp9/decoder/vp9_decodeframe.c", - "vp9/decoder/vp9_decodemv.c", - "vp9/decoder/vp9_decoder.c", - "vp9/decoder/vp9_detokenize.c", - "vp9/decoder/vp9_dsubexp.c", - "vp9/decoder/vp9_dthread.c", - "vpx/src/vpx_codec.c", - "vpx/src/vpx_decoder.c", - "vpx/src/vpx_image.c", - "vpx/src/vpx_psnr.c", - "vpx_dsp/bitreader.c", - "vpx_dsp/bitreader_buffer.c", - "vpx_dsp/intrapred.c", - "vpx_dsp/inv_txfm.c", - "vpx_dsp/loopfilter.c", - "vpx_dsp/prob.c", - "vpx_dsp/vpx_convolve.c", - "vpx_dsp/vpx_dsp_rtcd.c", - "vpx_mem/vpx_mem.c", - "vpx_scale/vpx_scale_rtcd.c", - "vpx_scale/generic/yv12config.c", - "vpx_scale/generic/yv12extend.c", - "vpx_util/vpx_thread.c", -] - -libvpx_sources_mt = [ - "vp8/decoder/threading.c", -] - -libvpx_sources_intrin_x86 = [ - "vp8/common/x86/filter_x86.c", - "vp8/common/x86/loopfilter_x86.c", - "vp8/common/x86/vp8_asm_stubs.c", - "vpx_dsp/x86/vpx_asm_stubs.c", -] -libvpx_sources_intrin_x86_mmx = [ - "vp8/common/x86/idct_blk_mmx.c", -] -libvpx_sources_intrin_x86_sse2 = [ - "vp8/common/x86/idct_blk_sse2.c", - "vp9/common/x86/vp9_idct_intrin_sse2.c", - "vpx_dsp/x86/inv_txfm_sse2.c", - "vpx_dsp/x86/loopfilter_sse2.c", -] -libvpx_sources_intrin_x86_ssse3 = [ - "vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c", -] -libvpx_sources_intrin_x86_avx2 = [ - "vpx_dsp/x86/loopfilter_avx2.c", - "vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c", -] -libvpx_sources_x86asm = [ - "vp8/common/x86/copy_sse2.asm", - "vp8/common/x86/copy_sse3.asm", - "vp8/common/x86/dequantize_mmx.asm", - "vp8/common/x86/idctllm_mmx.asm", - "vp8/common/x86/idctllm_sse2.asm", - "vp8/common/x86/iwalsh_mmx.asm", - "vp8/common/x86/iwalsh_sse2.asm", - "vp8/common/x86/loopfilter_sse2.asm", - "vp8/common/x86/recon_mmx.asm", - "vp8/common/x86/recon_sse2.asm", - "vp8/common/x86/subpixel_mmx.asm", - "vp8/common/x86/subpixel_sse2.asm", - "vp8/common/x86/subpixel_ssse3.asm", - "vp8/common/x86/vp8_loopfilter_mmx.asm", - "vpx_dsp/x86/intrapred_sse2.asm", - "vpx_dsp/x86/intrapred_ssse3.asm", - "vpx_dsp/x86/inv_wht_sse2.asm", - "vpx_dsp/x86/vpx_convolve_copy_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_8t_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm", - "vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm", - "vpx_ports/emms.asm", -] -libvpx_sources_x86_64asm = [ - "vp8/common/x86/loopfilter_block_sse2_x86_64.asm", - "vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm", -] - -libvpx_sources_arm = [ - "vpx_ports/arm_cpudetect.c", - "vp8/common/arm/loopfilter_arm.c", -] -libvpx_sources_arm_neon = [ - "vp8/common/arm/neon/bilinearpredict_neon.c", - "vp8/common/arm/neon/copymem_neon.c", - "vp8/common/arm/neon/dc_only_idct_add_neon.c", - "vp8/common/arm/neon/dequant_idct_neon.c", - "vp8/common/arm/neon/dequantizeb_neon.c", - "vp8/common/arm/neon/idct_blk_neon.c", - "vp8/common/arm/neon/idct_dequant_0_2x_neon.c", - "vp8/common/arm/neon/idct_dequant_full_2x_neon.c", - "vp8/common/arm/neon/iwalsh_neon.c", - "vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c", - "vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c", - "vp8/common/arm/neon/mbloopfilter_neon.c", - "vp8/common/arm/neon/shortidct4x4llm_neon.c", - "vp8/common/arm/neon/sixtappredict_neon.c", - "vp8/common/arm/neon/vp8_loopfilter_neon.c", - "vp9/common/arm/neon/vp9_iht4x4_add_neon.c", - "vp9/common/arm/neon/vp9_iht8x8_add_neon.c", - "vpx_dsp/arm/idct16x16_1_add_neon.c", - "vpx_dsp/arm/idct16x16_add_neon.c", - "vpx_dsp/arm/idct16x16_neon.c", - "vpx_dsp/arm/idct32x32_1_add_neon.c", - "vpx_dsp/arm/idct32x32_add_neon.c", - "vpx_dsp/arm/idct4x4_1_add_neon.c", - "vpx_dsp/arm/idct4x4_add_neon.c", - "vpx_dsp/arm/idct8x8_1_add_neon.c", - "vpx_dsp/arm/idct8x8_add_neon.c", - "vpx_dsp/arm/intrapred_neon.c", - "vpx_dsp/arm/loopfilter_16_neon.c", - "vpx_dsp/arm/loopfilter_4_neon.c", - "vpx_dsp/arm/loopfilter_8_neon.c", - "vpx_dsp/arm/loopfilter_neon.c", - "vpx_dsp/arm/vpx_convolve8_avg_neon.c", - "vpx_dsp/arm/vpx_convolve8_neon.c", - "vpx_dsp/arm/vpx_convolve_avg_neon.c", - "vpx_dsp/arm/vpx_convolve_copy_neon.c", - "vpx_dsp/arm/vpx_convolve_neon.c", -] -libvpx_sources_arm_neon_gas = [ - "vpx_dsp/arm/gas/intrapred_neon_asm.s", - "vpx_dsp/arm/gas/loopfilter_mb_neon.s", - "vpx_dsp/arm/gas/save_reg_neon.s", -] -libvpx_sources_arm_neon_armasm_ms = [ - "vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm", - "vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm", - "vpx_dsp/arm/armasm_ms/save_reg_neon.asm", -] -libvpx_sources_arm_neon_gas_apple = [ - "vpx_dsp/arm/gas_apple/intrapred_neon_asm.s", - "vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s", - "vpx_dsp/arm/gas_apple/save_reg_neon.s", -] - -libvpx_sources = [libvpx_dir + file for file in libvpx_sources] -libvpx_sources_mt = [libvpx_dir + file for file in libvpx_sources_mt] -libvpx_sources_intrin_x86 = [libvpx_dir + file for file in libvpx_sources_intrin_x86] -libvpx_sources_intrin_x86_mmx = [libvpx_dir + file for file in libvpx_sources_intrin_x86_mmx] -libvpx_sources_intrin_x86_sse2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_sse2] -libvpx_sources_intrin_x86_ssse3 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_ssse3] -libvpx_sources_intrin_x86_avx2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_avx2] -libvpx_sources_x86asm = [libvpx_dir + file for file in libvpx_sources_x86asm] -libvpx_sources_x86_64asm = [libvpx_dir + file for file in libvpx_sources_x86_64asm] -libvpx_sources_arm = [libvpx_dir + file for file in libvpx_sources_arm] -libvpx_sources_arm_neon = [libvpx_dir + file for file in libvpx_sources_arm_neon] -libvpx_sources_arm_neon_gas = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas] -libvpx_sources_arm_neon_armasm_ms = [libvpx_dir + file for file in libvpx_sources_arm_neon_armasm_ms] -libvpx_sources_arm_neon_gas_apple = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas_apple] - - -env_libvpx = env_modules.Clone() -env_libvpx.disable_warnings() -env_libvpx.Prepend(CPPPATH=[libvpx_dir]) - -webm_multithread = env["platform"] != "javascript" - -cpu_bits = env["bits"] -webm_cpu_x86 = False -webm_cpu_arm = False -if env["platform"] == "uwp": - if "arm" in env["PROGSUFFIX"]: - webm_cpu_arm = True - else: - webm_cpu_x86 = True -else: - import platform - - is_x11_or_server_arm = env["platform"] == "linuxbsd" and ( - platform.machine().startswith("arm") or platform.machine().startswith("aarch") - ) - is_macos_x86 = env["platform"] == "osx" and ("arch" in env and (env["arch"] != "arm64")) - is_ios_x86 = env["platform"] == "iphone" and ("arch" in env and env["arch"].startswith("x86")) - is_android_x86 = env["platform"] == "android" and env["android_arch"].startswith("x86") - if is_android_x86: - cpu_bits = "32" if env["android_arch"] == "x86" else "64" - webm_cpu_x86 = ( - not is_x11_or_server_arm - and (cpu_bits == "32" or cpu_bits == "64") - and ( - env["platform"] == "windows" - or env["platform"] == "linuxbsd" - or env["platform"] == "haiku" - or is_macos_x86 - or is_android_x86 - or is_ios_x86 - ) - ) - webm_cpu_arm = ( - is_x11_or_server_arm - or (not is_macos_x86 and env["platform"] == "osx") - or (not is_ios_x86 and env["platform"] == "iphone") - or (not is_android_x86 and env["platform"] == "android") - ) - -if webm_cpu_x86: - import subprocess - import os - - yasm_paths = [ - "yasm", - "../../../yasm", - ] - - yasm_found = False - - devnull = open(os.devnull) - for yasm_path in yasm_paths: - try: - yasm_found = True - subprocess.Popen([yasm_path, "--version"], stdout=devnull, stderr=devnull).communicate() - except Exception: - yasm_found = False - if yasm_found: - break - - if not yasm_found: - webm_cpu_x86 = False - print("YASM is necessary for WebM SIMD optimizations.") - -webm_simd_optimizations = False - -if webm_cpu_x86: - if env["platform"] == "windows" or env["platform"] == "uwp": - env_libvpx["ASFORMAT"] = "win" - elif env["platform"] == "osx" or env["platform"] == "iphone": - env_libvpx["ASFORMAT"] = "macho" - else: - env_libvpx["ASFORMAT"] = "elf" - env_libvpx["ASFORMAT"] += cpu_bits - - env_libvpx["AS"] = "yasm" - env_libvpx["ASFLAGS"] = "-I" + libvpx_dir[1:] + " -f $ASFORMAT -D $ASCPU" - env_libvpx["ASCOM"] = "$AS $ASFLAGS -o $TARGET $SOURCES" - - if cpu_bits == "32": - env_libvpx["ASCPU"] = "X86_32" - elif cpu_bits == "64": - env_libvpx["ASCPU"] = "X86_64" - - env_libvpx.Append(CPPDEFINES=["WEBM_X86ASM"]) - - webm_simd_optimizations = True - -if webm_cpu_arm: - if env["platform"] == "iphone": - env_libvpx["ASFLAGS"] = "-arch armv7" - elif env["platform"] == "android" and env["android_arch"] == "armv7" or env["platform"] == "linuxbsd": - env_libvpx["ASFLAGS"] = "-mfpu=neon" - elif env["platform"] == "uwp": - env_libvpx["AS"] = "armasm" - env_libvpx["ASFLAGS"] = "" - env_libvpx["ASCOM"] = "$AS $ASFLAGS -o $TARGET $SOURCES" - - env_libvpx.Append(CPPDEFINES=["WEBM_ARMASM"]) - - webm_simd_optimizations = True - -if webm_simd_optimizations == False: - print("WebM SIMD optimizations are disabled. Check if your CPU architecture, CPU bits or platform are supported!") - -env_libvpx.add_source_files(env.modules_sources, libvpx_sources) - -if webm_multithread: - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_mt) - -if webm_cpu_x86: - is_clang_or_gcc = ( - ("gcc" in os.path.basename(env["CC"])) or ("clang" in os.path.basename(env["CC"])) or ("osxcross" in env) - ) - - env_libvpx_mmx = env_libvpx.Clone() - if cpu_bits == "32" and is_clang_or_gcc: - env_libvpx_mmx.Append(CCFLAGS=["-mmmx"]) - env_libvpx_mmx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_mmx) - - env_libvpx_sse2 = env_libvpx.Clone() - if cpu_bits == "32" and is_clang_or_gcc: - env_libvpx_sse2.Append(CCFLAGS=["-msse2"]) - env_libvpx_sse2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_sse2) - - env_libvpx_ssse3 = env_libvpx.Clone() - if is_clang_or_gcc: - env_libvpx_ssse3.Append(CCFLAGS=["-mssse3"]) - env_libvpx_ssse3.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_ssse3) - - env_libvpx_avx2 = env_libvpx.Clone() - if is_clang_or_gcc: - env_libvpx_avx2.Append(CCFLAGS=["-mavx2"]) - env_libvpx_avx2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_avx2) - - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86) - - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86asm) - if cpu_bits == "64": - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86_64asm) -elif webm_cpu_arm: - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm) - if env["platform"] == "android": - env_libvpx.Prepend(CPPPATH=[libvpx_dir + "third_party/android"]) - env_libvpx.add_source_files(env.modules_sources, [libvpx_dir + "third_party/android/cpu-features.c"]) - - env_libvpx_neon = env_libvpx.Clone() - env_libvpx_neon.add_source_files(env.modules_sources, libvpx_sources_arm_neon) - - if env["platform"] == "uwp": - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_armasm_ms) - elif env["platform"] == "iphone": - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas_apple) - elif (is_x11_or_server_arm and cpu_bits == "32") or ( - env["platform"] == "android" and not env["android_arch"] == "arm64v8" - ): - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas) diff --git a/modules/webm/register_types.cpp b/modules/webm/register_types.cpp deleted file mode 100644 index 8f690a6892..0000000000 --- a/modules/webm/register_types.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/*************************************************************************/ -/* register_types.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "register_types.h" - -#include "video_stream_webm.h" - -static Ref resource_loader_webm; - -void register_webm_types() { - resource_loader_webm.instantiate(); - ResourceLoader::add_resource_format_loader(resource_loader_webm, true); - - GDREGISTER_CLASS(VideoStreamWebm); -} - -void unregister_webm_types() { - ResourceLoader::remove_resource_format_loader(resource_loader_webm); - resource_loader_webm.unref(); -} diff --git a/modules/webm/register_types.h b/modules/webm/register_types.h deleted file mode 100644 index d090fe745b..0000000000 --- a/modules/webm/register_types.h +++ /dev/null @@ -1,37 +0,0 @@ -/*************************************************************************/ -/* register_types.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef WEBM_REGISTER_TYPES_H -#define WEBM_REGISTER_TYPES_H - -void register_webm_types(); -void unregister_webm_types(); - -#endif // WEBM_REGISTER_TYPES_H diff --git a/modules/webm/video_stream_webm.cpp b/modules/webm/video_stream_webm.cpp deleted file mode 100644 index 187a27b6c2..0000000000 --- a/modules/webm/video_stream_webm.cpp +++ /dev/null @@ -1,469 +0,0 @@ -/*************************************************************************/ -/* video_stream_webm.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "video_stream_webm.h" - -#include "core/config/project_settings.h" -#include "core/io/file_access.h" -#include "core/os/os.h" -#include "servers/audio_server.h" - -#include "thirdparty/misc/yuv2rgb.h" - -// libsimplewebm -#include -#include - -// libvpx -#include - -// libwebm -#include - -class MkvReader : public mkvparser::IMkvReader { -public: - MkvReader(const String &p_file) { - file = FileAccess::open(p_file, FileAccess::READ); - - ERR_FAIL_COND_MSG(!file, "Failed loading resource: '" + p_file + "'."); - } - ~MkvReader() { - if (file) { - memdelete(file); - } - } - - virtual int Read(long long pos, long len, unsigned char *buf) { - if (file) { - if (file->get_position() != (uint64_t)pos) { - file->seek(pos); - } - if (file->get_buffer(buf, len) == (uint64_t)len) { - return 0; - } - } - return -1; - } - - virtual int Length(long long *total, long long *available) { - if (file) { - const uint64_t len = file->get_length(); - if (total) { - *total = len; - } - if (available) { - *available = len; - } - return 0; - } - return -1; - } - -private: - FileAccess *file; -}; - -/**/ - -VideoStreamPlaybackWebm::VideoStreamPlaybackWebm() : - - texture(memnew(ImageTexture)) {} -VideoStreamPlaybackWebm::~VideoStreamPlaybackWebm() { - delete_pointers(); -} - -bool VideoStreamPlaybackWebm::open_file(const String &p_file) { - file_name = p_file; - webm = memnew(WebMDemuxer(new MkvReader(file_name), 0, audio_track)); - if (webm->isOpen()) { - video = memnew(VPXDecoder(*webm, OS::get_singleton()->get_processor_count())); - if (video->isOpen()) { - audio = memnew(OpusVorbisDecoder(*webm)); - if (audio->isOpen()) { - audio_frame = memnew(WebMFrame); - pcm = (float *)memalloc(sizeof(float) * audio->getBufferSamples() * webm->getChannels()); - } else { - memdelete(audio); - audio = nullptr; - } - - frame_data.resize((webm->getWidth() * webm->getHeight()) << 2); - Ref img; - img.instantiate(); - img->create(webm->getWidth(), webm->getHeight(), false, Image::FORMAT_RGBA8); - texture->create_from_image(img); - - return true; - } - memdelete(video); - video = nullptr; - } - memdelete(webm); - webm = nullptr; - return false; -} - -void VideoStreamPlaybackWebm::stop() { - if (playing) { - delete_pointers(); - - pcm = nullptr; - - audio_frame = nullptr; - video_frames = nullptr; - - video = nullptr; - audio = nullptr; - - open_file(file_name); //Should not fail here... - - video_frames_capacity = video_frames_pos = 0; - num_decoded_samples = 0; - samples_offset = -1; - video_frame_delay = video_pos = 0.0; - } - time = 0.0; - playing = false; -} - -void VideoStreamPlaybackWebm::play() { - stop(); - - delay_compensation = ProjectSettings::get_singleton()->get("audio/video/video_delay_compensation_ms"); - delay_compensation /= 1000.0; - - playing = true; -} - -bool VideoStreamPlaybackWebm::is_playing() const { - return playing; -} - -void VideoStreamPlaybackWebm::set_paused(bool p_paused) { - paused = p_paused; -} - -bool VideoStreamPlaybackWebm::is_paused() const { - return paused; -} - -void VideoStreamPlaybackWebm::set_loop(bool p_enable) { - //Empty -} - -bool VideoStreamPlaybackWebm::has_loop() const { - return false; -} - -float VideoStreamPlaybackWebm::get_length() const { - if (webm) { - return webm->getLength(); - } - return 0.0f; -} - -float VideoStreamPlaybackWebm::get_playback_position() const { - return video_pos; -} - -void VideoStreamPlaybackWebm::seek(float p_time) { - WARN_PRINT_ONCE("Seeking in Theora and WebM videos is not implemented yet (it's only supported for GDNative-provided video streams)."); -} - -void VideoStreamPlaybackWebm::set_audio_track(int p_idx) { - audio_track = p_idx; -} - -Ref VideoStreamPlaybackWebm::get_texture() const { - return texture; -} - -void VideoStreamPlaybackWebm::update(float p_delta) { - if ((!playing || paused) || !video) { - return; - } - - time += p_delta; - - if (time < video_pos) { - return; - } - - bool audio_buffer_full = false; - - if (samples_offset > -1) { - //Mix remaining samples - const int to_read = num_decoded_samples - samples_offset; - const int mixed = mix_callback(mix_udata, pcm + samples_offset * webm->getChannels(), to_read); - if (mixed != to_read) { - samples_offset += mixed; - audio_buffer_full = true; - } else { - samples_offset = -1; - } - } - - const bool hasAudio = (audio && mix_callback); - while ((hasAudio && !audio_buffer_full && !has_enough_video_frames()) || - (!hasAudio && video_frames_pos == 0)) { - if (hasAudio && !audio_buffer_full && audio_frame->isValid() && - audio->getPCMF(*audio_frame, pcm, num_decoded_samples) && num_decoded_samples > 0) { - const int mixed = mix_callback(mix_udata, pcm, num_decoded_samples); - - if (mixed != num_decoded_samples) { - samples_offset = mixed; - audio_buffer_full = true; - } - } - - WebMFrame *video_frame; - if (video_frames_pos >= video_frames_capacity) { - WebMFrame **video_frames_new = (WebMFrame **)memrealloc(video_frames, ++video_frames_capacity * sizeof(void *)); - ERR_FAIL_COND(!video_frames_new); //Out of memory - (video_frames = video_frames_new)[video_frames_capacity - 1] = memnew(WebMFrame); - } - video_frame = video_frames[video_frames_pos]; - - if (!webm->readFrame(video_frame, audio_frame)) { //This will invalidate frames - break; //Can't demux, EOS? - } - - if (video_frame->isValid()) { - ++video_frames_pos; - } - }; - - bool video_frame_done = false; - while (video_frames_pos > 0 && !video_frame_done) { - WebMFrame *video_frame = video_frames[0]; - - // It seems VPXDecoder::decode has to be executed even though we might skip this frame - if (video->decode(*video_frame)) { - VPXDecoder::IMAGE_ERROR err; - VPXDecoder::Image image; - - if (should_process(*video_frame)) { - if ((err = video->getImage(image)) != VPXDecoder::NO_FRAME) { - if (err == VPXDecoder::NO_ERROR && image.w == webm->getWidth() && image.h == webm->getHeight()) { - uint8_t *w = frame_data.ptrw(); - bool converted = false; - - if (image.chromaShiftW == 0 && image.chromaShiftH == 0 && image.cs == VPX_CS_SRGB) { - uint8_t *wp = w; - unsigned char *rRow = image.planes[2]; - unsigned char *gRow = image.planes[0]; - unsigned char *bRow = image.planes[1]; - for (int i = 0; i < image.h; i++) { - for (int j = 0; j < image.w; j++) { - *wp++ = rRow[j]; - *wp++ = gRow[j]; - *wp++ = bRow[j]; - *wp++ = 255; - } - rRow += image.linesize[2]; - gRow += image.linesize[0]; - bRow += image.linesize[1]; - } - converted = true; - } else if (image.chromaShiftW == 1 && image.chromaShiftH == 1) { - yuv420_2_rgb8888(w, image.planes[0], image.planes[1], image.planes[2], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2); - //libyuv::I420ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - converted = true; - } else if (image.chromaShiftW == 1 && image.chromaShiftH == 0) { - yuv422_2_rgb8888(w, image.planes[0], image.planes[1], image.planes[2], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2); - //libyuv::I422ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - converted = true; - } else if (image.chromaShiftW == 0 && image.chromaShiftH == 0) { - yuv444_2_rgb8888(w, image.planes[0], image.planes[1], image.planes[2], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2); - //libyuv::I444ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - converted = true; - } else if (image.chromaShiftW == 2 && image.chromaShiftH == 0) { - //libyuv::I411ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2] image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - //converted = true; - } - - if (converted) { - Ref img = memnew(Image(image.w, image.h, 0, Image::FORMAT_RGBA8, frame_data)); - texture->update(img); //Zero copy send to rendering server - video_frame_done = true; - } - } - } - } - } - - video_pos = video_frame->time; - memmove(video_frames, video_frames + 1, (--video_frames_pos) * sizeof(void *)); - video_frames[video_frames_pos] = video_frame; - } - - if (video_frames_pos == 0 && webm->isEOS()) { - stop(); - } -} - -void VideoStreamPlaybackWebm::set_mix_callback(VideoStreamPlayback::AudioMixCallback p_callback, void *p_userdata) { - mix_callback = p_callback; - mix_udata = p_userdata; -} - -int VideoStreamPlaybackWebm::get_channels() const { - if (audio) { - return webm->getChannels(); - } - return 0; -} - -int VideoStreamPlaybackWebm::get_mix_rate() const { - if (audio) { - return webm->getSampleRate(); - } - return 0; -} - -inline bool VideoStreamPlaybackWebm::has_enough_video_frames() const { - if (video_frames_pos > 0) { - // FIXME: AudioServer output latency was fixed in af9bb0e, previously it used to - // systematically return 0. Now that it gives a proper latency, it broke this - // code where the delay compensation likely never really worked. - //const double audio_delay = AudioServer::get_singleton()->get_output_latency(); - const double video_time = video_frames[video_frames_pos - 1]->time; - return video_time >= time + /* audio_delay + */ delay_compensation; - } - return false; -} - -bool VideoStreamPlaybackWebm::should_process(WebMFrame &video_frame) { - // FIXME: AudioServer output latency was fixed in af9bb0e, previously it used to - // systematically return 0. Now that it gives a proper latency, it broke this - // code where the delay compensation likely never really worked. - //const double audio_delay = AudioServer::get_singleton()->get_output_latency(); - return video_frame.time >= time + /* audio_delay + */ delay_compensation; -} - -void VideoStreamPlaybackWebm::delete_pointers() { - if (pcm) { - memfree(pcm); - } - - if (audio_frame) { - memdelete(audio_frame); - } - if (video_frames) { - for (int i = 0; i < video_frames_capacity; ++i) { - memdelete(video_frames[i]); - } - memfree(video_frames); - } - - if (video) { - memdelete(video); - } - if (audio) { - memdelete(audio); - } - - if (webm) { - memdelete(webm); - } -} - -/**/ - -VideoStreamWebm::VideoStreamWebm() {} - -Ref VideoStreamWebm::instance_playback() { - Ref pb = memnew(VideoStreamPlaybackWebm); - pb->set_audio_track(audio_track); - if (pb->open_file(file)) { - return pb; - } - return nullptr; -} - -void VideoStreamWebm::set_file(const String &p_file) { - file = p_file; -} - -String VideoStreamWebm::get_file() { - return file; -} - -void VideoStreamWebm::_bind_methods() { - ClassDB::bind_method(D_METHOD("set_file", "file"), &VideoStreamWebm::set_file); - ClassDB::bind_method(D_METHOD("get_file"), &VideoStreamWebm::get_file); - - ADD_PROPERTY(PropertyInfo(Variant::STRING, "file", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "set_file", "get_file"); -} - -void VideoStreamWebm::set_audio_track(int p_track) { - audio_track = p_track; -} - -//////////// - -RES ResourceFormatLoaderWebm::load(const String &p_path, const String &p_original_path, Error *r_error, bool p_use_sub_threads, float *r_progress, CacheMode p_cache_mode) { - FileAccess *f = FileAccess::open(p_path, FileAccess::READ); - if (!f) { - if (r_error) { - *r_error = ERR_CANT_OPEN; - } - return RES(); - } - - VideoStreamWebm *stream = memnew(VideoStreamWebm); - stream->set_file(p_path); - - Ref webm_stream = Ref(stream); - - if (r_error) { - *r_error = OK; - } - - f->close(); - memdelete(f); - return webm_stream; -} - -void ResourceFormatLoaderWebm::get_recognized_extensions(List *p_extensions) const { - p_extensions->push_back("webm"); -} - -bool ResourceFormatLoaderWebm::handles_type(const String &p_type) const { - return ClassDB::is_parent_class(p_type, "VideoStream"); -} - -String ResourceFormatLoaderWebm::get_resource_type(const String &p_path) const { - String el = p_path.get_extension().to_lower(); - if (el == "webm") { - return "VideoStreamWebm"; - } - return ""; -} diff --git a/modules/webm/video_stream_webm.h b/modules/webm/video_stream_webm.h deleted file mode 100644 index 60e02ab38b..0000000000 --- a/modules/webm/video_stream_webm.h +++ /dev/null @@ -1,135 +0,0 @@ -/*************************************************************************/ -/* video_stream_webm.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef VIDEO_STREAM_WEBM_H -#define VIDEO_STREAM_WEBM_H - -#include "core/io/resource_loader.h" -#include "scene/resources/video_stream.h" - -class WebMFrame; -class WebMDemuxer; -class VPXDecoder; -class OpusVorbisDecoder; - -class VideoStreamPlaybackWebm : public VideoStreamPlayback { - GDCLASS(VideoStreamPlaybackWebm, VideoStreamPlayback); - - String file_name; - int audio_track = 0; - - WebMDemuxer *webm = nullptr; - VPXDecoder *video = nullptr; - OpusVorbisDecoder *audio = nullptr; - - WebMFrame **video_frames = nullptr, *audio_frame = nullptr; - int video_frames_pos = 0, video_frames_capacity = 0; - - int num_decoded_samples = 0, samples_offset = -1; - AudioMixCallback mix_callback = nullptr; - void *mix_udata = nullptr; - - bool playing = false, paused = false; - double delay_compensation = 0.0; - double time = 0.0, video_frame_delay = 0.0, video_pos = 0.0; - - Vector frame_data; - Ref texture; - - float *pcm = nullptr; - -public: - VideoStreamPlaybackWebm(); - ~VideoStreamPlaybackWebm(); - - bool open_file(const String &p_file); - - virtual void stop() override; - virtual void play() override; - - virtual bool is_playing() const override; - - virtual void set_paused(bool p_paused) override; - virtual bool is_paused() const override; - - virtual void set_loop(bool p_enable) override; - virtual bool has_loop() const override; - - virtual float get_length() const override; - - virtual float get_playback_position() const override; - virtual void seek(float p_time) override; - - virtual void set_audio_track(int p_idx) override; - - virtual Ref get_texture() const override; - virtual void update(float p_delta) override; - - virtual void set_mix_callback(AudioMixCallback p_callback, void *p_userdata) override; - virtual int get_channels() const override; - virtual int get_mix_rate() const override; - -private: - inline bool has_enough_video_frames() const; - bool should_process(WebMFrame &video_frame); - - void delete_pointers(); -}; - -/**/ - -class VideoStreamWebm : public VideoStream { - GDCLASS(VideoStreamWebm, VideoStream); - - String file; - int audio_track = 0; - -protected: - static void _bind_methods(); - -public: - VideoStreamWebm(); - - virtual Ref instance_playback() override; - - virtual void set_file(const String &p_file); - String get_file(); - virtual void set_audio_track(int p_track) override; -}; - -class ResourceFormatLoaderWebm : public ResourceFormatLoader { -public: - virtual RES load(const String &p_path, const String &p_original_path = "", Error *r_error = nullptr, bool p_use_sub_threads = false, float *r_progress = nullptr, CacheMode p_cache_mode = CACHE_MODE_REUSE); - virtual void get_recognized_extensions(List *p_extensions) const; - virtual bool handles_type(const String &p_type) const; - virtual String get_resource_type(const String &p_path) const; -}; - -#endif // VIDEO_STREAM_WEBM_H diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py index afb7c7b2ab..7ce0f77c51 100644 --- a/platform/linuxbsd/detect.py +++ b/platform/linuxbsd/detect.py @@ -291,17 +291,10 @@ def configure(env): if any(platform.machine() in s for s in list_of_x86): env["x86_libtheora_opt_gcc"] = True - if not env["builtin_libvpx"]: - env.ParseConfig("pkg-config vpx --cflags --libs") - if not env["builtin_libvorbis"]: env["builtin_libogg"] = False # Needed to link against system libvorbis env.ParseConfig("pkg-config vorbis vorbisfile --cflags --libs") - if not env["builtin_opus"]: - env["builtin_libogg"] = False # Needed to link against system opus - env.ParseConfig("pkg-config opus opusfile --cflags --libs") - if not env["builtin_libogg"]: env.ParseConfig("pkg-config ogg --cflags --libs") diff --git a/platform/osx/detect.py b/platform/osx/detect.py index 10cf2b591e..83523dde66 100644 --- a/platform/osx/detect.py +++ b/platform/osx/detect.py @@ -97,7 +97,6 @@ def configure(env): env["AR"] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ar" env["RANLIB"] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib" env["AS"] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as" - env.Append(CPPDEFINES=["__MACPORTS__"]) # hack to fix libvpx MM256_BROADCASTSI128_SI256 define else: env["CC"] = "clang" env["CXX"] = "clang++" @@ -125,7 +124,6 @@ def configure(env): env["AR"] = basecmd + "ar" env["RANLIB"] = basecmd + "ranlib" env["AS"] = basecmd + "as" - env.Append(CPPDEFINES=["__MACPORTS__"]) # hack to fix libvpx MM256_BROADCASTSI128_SI256 define if env["use_ubsan"] or env["use_asan"] or env["use_tsan"]: env.extra_suffix += "s" diff --git a/scene/gui/video_player.cpp b/scene/gui/video_player.cpp index 8734037a57..989aabc549 100644 --- a/scene/gui/video_player.cpp +++ b/scene/gui/video_player.cpp @@ -29,9 +29,9 @@ /*************************************************************************/ #include "video_player.h" -#include "scene/scene_string_names.h" #include "core/os/os.h" +#include "scene/scene_string_names.h" #include "servers/audio_server.h" int VideoPlayer::sp_get_channel_count() const { @@ -55,7 +55,7 @@ bool VideoPlayer::mix(AudioFrame *p_buffer, int p_frames) { return false; } -// Called from main thread (eg VideoStreamPlaybackWebm::update) +// Called from main thread (e.g. VideoStreamPlaybackTheora::update). int VideoPlayer::_audio_mix_callback(void *p_udata, const float *p_data, int p_frames) { ERR_FAIL_NULL_V(p_udata, 0); ERR_FAIL_NULL_V(p_data, 0); diff --git a/thirdparty/README.md b/thirdparty/README.md index 157622a2d1..964ac6246e 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -255,25 +255,6 @@ Files extracted from upstream source: - `LICENSE` -## libsimplewebm - -- Upstream: https://github.com/zaps166/libsimplewebm -- Version: git (fe57fd3cfe6c0af4c6af110b1f84a90cf191d943, 2019) -- License: MIT (main), BSD-3-Clause (libwebm) - -This contains libwebm, but the version in use is updated from the one used by libsimplewebm, -and may have *unmarked* alterations from that. - -Files extracted from upstream source: - -- all the .cpp, .hpp files in the main folder except `example.cpp` -- LICENSE - -Important: Some files have Godot-made changes. -They are marked with `// -- GODOT start --` and `// -- GODOT end --` -comments. - - ## libtheora - Upstream: https://www.theora.org @@ -303,23 +284,6 @@ Files extracted from upstream source: - COPYING -## libvpx - -- Upstream: https://chromium.googlesource.com/webm/libvpx/ -- Version: 1.6.0 (2016) -- License: BSD-3-Clause - -Files extracted from upstream source: - -TODO. - -Important: File `libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c` has -Godot-made change marked with `// -- GODOT --` comments. - -The files `libvpx/third_party/android/cpu-features.{c,h}` were copied -from the Android NDK r18. - - ## libwebp - Upstream: https://chromium.googlesource.com/webm/libwebp/ @@ -531,23 +495,6 @@ Patch files are provided in `oidn/patches/`. - scripts/resource_to_cpp.py (used in modules/denoise/resource_to_cpp.py) -## opus - -- Upstream: https://opus-codec.org -- Version: 1.1.5 (opus) and 0.8 (opusfile) (2017) -- License: BSD-3-Clause - -Files extracted from upstream source: - -- all .c and .h files in src/ (both opus and opusfile) -- all .h files in include/ (both opus and opusfile) as opus/ -- remove unused `opus_demo.c`, -- remove `http.c`, `wincerts.c` and `winerrno.h` (part of - unused libopusurl) -- celt/ and silk/ subfolders -- COPYING - - ## pcre2 - Upstream: http://www.pcre.org diff --git a/thirdparty/libsimplewebm/LICENSE b/thirdparty/libsimplewebm/LICENSE deleted file mode 100644 index 058633ac18..0000000000 --- a/thirdparty/libsimplewebm/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2016 Błażej Szczygieł - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp b/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp deleted file mode 100644 index b5824b17be..0000000000 --- a/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#include "OpusVorbisDecoder.hpp" - -#include -#include - -#include - -struct VorbisDecoder -{ - vorbis_info info; - vorbis_dsp_state dspState; - vorbis_block block; - ogg_packet op; - - bool hasDSPState, hasBlock; -}; - -/**/ - -OpusVorbisDecoder::OpusVorbisDecoder(const WebMDemuxer &demuxer) : - m_vorbis(NULL), m_opus(NULL), - m_numSamples(0) -{ - switch (demuxer.getAudioCodec()) - { - case WebMDemuxer::AUDIO_VORBIS: - m_channels = demuxer.getChannels(); - if (openVorbis(demuxer)) - return; - break; - case WebMDemuxer::AUDIO_OPUS: - m_channels = demuxer.getChannels(); - if (openOpus(demuxer)) - return; - break; - default: - return; - } - close(); -} -OpusVorbisDecoder::~OpusVorbisDecoder() -{ - close(); -} - -bool OpusVorbisDecoder::isOpen() const -{ - return (m_vorbis || m_opus); -} - -bool OpusVorbisDecoder::getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples) -{ - if (m_vorbis) - { - m_vorbis->op.packet = frame.buffer; - m_vorbis->op.bytes = frame.bufferSize; - - if (vorbis_synthesis(&m_vorbis->block, &m_vorbis->op)) - return false; - if (vorbis_synthesis_blockin(&m_vorbis->dspState, &m_vorbis->block)) - return false; - - const int maxSamples = getBufferSamples(); - int samplesCount, count = 0; - float **pcm; - while ((samplesCount = vorbis_synthesis_pcmout(&m_vorbis->dspState, &pcm))) - { - const int toConvert = samplesCount <= maxSamples ? samplesCount : maxSamples; - for (int c = 0; c < m_channels; ++c) - { - float *samples = pcm[c]; - for (int i = 0, j = c; i < toConvert; ++i, j += m_channels) - { - int sample = samples[i] * 32767.0f; - if (sample > 32767) - sample = 32767; - else if (sample < -32768) - sample = -32768; - buffer[count + j] = sample; - } - } - vorbis_synthesis_read(&m_vorbis->dspState, toConvert); - count += toConvert; - } - - numOutSamples = count; - return true; - } - else if (m_opus) - { - const int samples = opus_decode(m_opus, frame.buffer, frame.bufferSize, buffer, m_numSamples, 0); - if (samples >= 0) - { - numOutSamples = samples; - return true; - } - } - return false; -} - -// -- GODOT begin -- -bool OpusVorbisDecoder::getPCMF(WebMFrame &frame, float *buffer, int &numOutSamples) { - if (m_vorbis) { - m_vorbis->op.packet = frame.buffer; - m_vorbis->op.bytes = frame.bufferSize; - - if (vorbis_synthesis(&m_vorbis->block, &m_vorbis->op)) - return false; - if (vorbis_synthesis_blockin(&m_vorbis->dspState, &m_vorbis->block)) - return false; - - const int maxSamples = getBufferSamples(); - int samplesCount, count = 0; - float **pcm; - while ((samplesCount = vorbis_synthesis_pcmout(&m_vorbis->dspState, &pcm))) { - const int toConvert = samplesCount <= maxSamples ? samplesCount : maxSamples; - for (int c = 0; c < m_channels; ++c) { - float *samples = pcm[c]; - for (int i = 0, j = c; i < toConvert; ++i, j += m_channels) { - buffer[count + j] = samples[i]; - } - } - vorbis_synthesis_read(&m_vorbis->dspState, toConvert); - count += toConvert; - } - - numOutSamples = count; - return true; - } else if (m_opus) { - const int samples = opus_decode_float(m_opus, frame.buffer, frame.bufferSize, buffer, m_numSamples, 0); - if (samples >= 0) { - numOutSamples = samples; - return true; - } - } - return false; -} -// -- GODOT end -- - -bool OpusVorbisDecoder::openVorbis(const WebMDemuxer &demuxer) -{ - size_t extradataSize = 0; - const unsigned char *extradata = demuxer.getAudioExtradata(extradataSize); - - if (extradataSize < 3 || !extradata || extradata[0] != 2) - return false; - - size_t headerSize[3] = {0}; - size_t offset = 1; - - /* Calculate three headers sizes */ - for (int i = 0; i < 2; ++i) - { - for (;;) - { - if (offset >= extradataSize) - return false; - headerSize[i] += extradata[offset]; - if (extradata[offset++] < 0xFF) - break; - } - } - headerSize[2] = extradataSize - (headerSize[0] + headerSize[1] + offset); - - if (headerSize[0] + headerSize[1] + headerSize[2] + offset != extradataSize) - return false; - - ogg_packet op[3]; - memset(op, 0, sizeof op); - - op[0].packet = (unsigned char *)extradata + offset; - op[0].bytes = headerSize[0]; - op[0].b_o_s = 1; - - op[1].packet = (unsigned char *)extradata + offset + headerSize[0]; - op[1].bytes = headerSize[1]; - - op[2].packet = (unsigned char *)extradata + offset + headerSize[0] + headerSize[1]; - op[2].bytes = headerSize[2]; - - m_vorbis = new VorbisDecoder; - m_vorbis->hasDSPState = m_vorbis->hasBlock = false; - vorbis_info_init(&m_vorbis->info); - - /* Upload three Vorbis headers into libvorbis */ - vorbis_comment vc; - vorbis_comment_init(&vc); - for (int i = 0; i < 3; ++i) - { - if (vorbis_synthesis_headerin(&m_vorbis->info, &vc, &op[i])) - { - vorbis_comment_clear(&vc); - return false; - } - } - vorbis_comment_clear(&vc); - - if (vorbis_synthesis_init(&m_vorbis->dspState, &m_vorbis->info)) - return false; - m_vorbis->hasDSPState = true; - - if (m_vorbis->info.channels != m_channels || m_vorbis->info.rate != demuxer.getSampleRate()) - return false; - - if (vorbis_block_init(&m_vorbis->dspState, &m_vorbis->block)) - return false; - m_vorbis->hasBlock = true; - - memset(&m_vorbis->op, 0, sizeof m_vorbis->op); - - m_numSamples = 4096 / m_channels; - - return true; -} -bool OpusVorbisDecoder::openOpus(const WebMDemuxer &demuxer) -{ - int opusErr = 0; - m_opus = opus_decoder_create(demuxer.getSampleRate(), m_channels, &opusErr); - if (!opusErr) - { - m_numSamples = demuxer.getSampleRate() * 0.06 + 0.5; //Maximum frame size (for 60 ms frame) - return true; - } - return false; -} - -void OpusVorbisDecoder::close() -{ - if (m_vorbis) - { - if (m_vorbis->hasBlock) - vorbis_block_clear(&m_vorbis->block); - if (m_vorbis->hasDSPState) - vorbis_dsp_clear(&m_vorbis->dspState); - vorbis_info_clear(&m_vorbis->info); - delete m_vorbis; - } - if (m_opus) - opus_decoder_destroy(m_opus); -} diff --git a/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp b/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp deleted file mode 100644 index f285b3fbd6..0000000000 --- a/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef OPUSVORBISDECODER_HPP -#define OPUSVORBISDECODER_HPP - -#include "WebMDemuxer.hpp" - -struct VorbisDecoder; -struct OpusDecoder; - -class OpusVorbisDecoder -{ - OpusVorbisDecoder(const OpusVorbisDecoder &); - void operator =(const OpusVorbisDecoder &); -public: - OpusVorbisDecoder(const WebMDemuxer &demuxer); - ~OpusVorbisDecoder(); - - bool isOpen() const; - - inline int getBufferSamples() const - { - return m_numSamples; - } - bool getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples); -// -- GODOT begin -- - bool getPCMF(WebMFrame &frame, float *buffer, int &numOutSamples); -// -- GODOT end -- - -private: - bool openVorbis(const WebMDemuxer &demuxer); - bool openOpus(const WebMDemuxer &demuxer); - - void close(); - - VorbisDecoder *m_vorbis; - OpusDecoder *m_opus; - int m_numSamples; - int m_channels; - -}; - -#endif // OPUSVORBISDECODER_HPP diff --git a/thirdparty/libsimplewebm/VPXDecoder.cpp b/thirdparty/libsimplewebm/VPXDecoder.cpp deleted file mode 100644 index e2606f83ba..0000000000 --- a/thirdparty/libsimplewebm/VPXDecoder.cpp +++ /dev/null @@ -1,154 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#include "VPXDecoder.hpp" - -#include -#include - -#include -#include - -VPXDecoder::VPXDecoder(const WebMDemuxer &demuxer, unsigned threads) : - m_ctx(NULL), - m_iter(NULL), - m_delay(0), - m_last_space(VPX_CS_UNKNOWN) -{ - if (threads > 8) - threads = 8; - else if (threads < 1) - threads = 1; - - const vpx_codec_dec_cfg_t codecCfg = { - threads, - 0, - 0 - }; - vpx_codec_iface_t *codecIface = NULL; - - switch (demuxer.getVideoCodec()) - { - case WebMDemuxer::VIDEO_VP8: - codecIface = vpx_codec_vp8_dx(); - break; - case WebMDemuxer::VIDEO_VP9: - codecIface = vpx_codec_vp9_dx(); - m_delay = threads - 1; - break; - default: - return; - } - - m_ctx = new vpx_codec_ctx_t; - if (vpx_codec_dec_init(m_ctx, codecIface, &codecCfg, m_delay > 0 ? VPX_CODEC_USE_FRAME_THREADING : 0)) - { - delete m_ctx; - m_ctx = NULL; - } -} -VPXDecoder::~VPXDecoder() -{ - if (m_ctx) - { - vpx_codec_destroy(m_ctx); - delete m_ctx; - } -} - -bool VPXDecoder::decode(const WebMFrame &frame) -{ - m_iter = NULL; - return !vpx_codec_decode(m_ctx, frame.buffer, frame.bufferSize, NULL, 0); -} -VPXDecoder::IMAGE_ERROR VPXDecoder::getImage(Image &image) -{ - IMAGE_ERROR err = NO_FRAME; - if (vpx_image_t *img = vpx_codec_get_frame(m_ctx, &m_iter)) - { - // It seems to be a common problem that UNKNOWN comes up a lot, yet FFMPEG is somehow getting accurate colour-space information. - // After checking FFMPEG code, *they're* getting colour-space information, so I'm assuming something like this is going on. - // It appears to work, at least. - if (img->cs != VPX_CS_UNKNOWN) - m_last_space = img->cs; - if ((img->fmt & VPX_IMG_FMT_PLANAR) && !(img->fmt & (VPX_IMG_FMT_HAS_ALPHA | VPX_IMG_FMT_HIGHBITDEPTH))) - { - if (img->stride[0] && img->stride[1] && img->stride[2]) - { - const int uPlane = !!(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1; - const int vPlane = !(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1; - - image.w = img->d_w; - image.h = img->d_h; - image.cs = m_last_space; - image.chromaShiftW = img->x_chroma_shift; - image.chromaShiftH = img->y_chroma_shift; - - image.planes[0] = img->planes[0]; - image.planes[1] = img->planes[uPlane]; - image.planes[2] = img->planes[vPlane]; - - image.linesize[0] = img->stride[0]; - image.linesize[1] = img->stride[uPlane]; - image.linesize[2] = img->stride[vPlane]; - - err = NO_ERROR; - } - } - else - { - err = UNSUPPORTED_FRAME; - } - } - return err; -} - -/**/ - -// -- GODOT begin -- -#if 0 -// -- GODOT end -- - -static inline int ceilRshift(int val, int shift) -{ - return (val + (1 << shift) - 1) >> shift; -} - -int VPXDecoder::Image::getWidth(int plane) const -{ - if (!plane) - return w; - return ceilRshift(w, chromaShiftW); -} -int VPXDecoder::Image::getHeight(int plane) const -{ - if (!plane) - return h; - return ceilRshift(h, chromaShiftH); -} - -// -- GODOT begin -- -#endif -// -- GODOT end -- - diff --git a/thirdparty/libsimplewebm/VPXDecoder.hpp b/thirdparty/libsimplewebm/VPXDecoder.hpp deleted file mode 100644 index 5071b069cb..0000000000 --- a/thirdparty/libsimplewebm/VPXDecoder.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef VPXDECODER_HPP -#define VPXDECODER_HPP - -#include "WebMDemuxer.hpp" - -struct vpx_codec_ctx; - -class VPXDecoder -{ - VPXDecoder(const VPXDecoder &); - void operator =(const VPXDecoder &); -public: - class Image - { - public: -// -- GODOT begin -- -#if 0 -// -- GODOT end -- - int getWidth(int plane) const; - int getHeight(int plane) const; -// -- GODOT begin -- -#endif -// -- GODOT end -- - - int w, h; - int cs; - int chromaShiftW, chromaShiftH; - unsigned char *planes[3]; - int linesize[3]; - }; - - enum IMAGE_ERROR - { - UNSUPPORTED_FRAME = -1, - NO_ERROR, - NO_FRAME - }; - - VPXDecoder(const WebMDemuxer &demuxer, unsigned threads = 1); - ~VPXDecoder(); - - inline bool isOpen() const - { - return (bool)m_ctx; - } - - inline int getFramesDelay() const - { - return m_delay; - } - - bool decode(const WebMFrame &frame); - IMAGE_ERROR getImage(Image &image); //The data is NOT copied! Only 3-plane, 8-bit images are supported. - -private: - vpx_codec_ctx *m_ctx; - const void *m_iter; - int m_delay; - int m_last_space; -}; - -#endif // VPXDECODER_HPP diff --git a/thirdparty/libsimplewebm/WebMDemuxer.cpp b/thirdparty/libsimplewebm/WebMDemuxer.cpp deleted file mode 100644 index cb63deccd5..0000000000 --- a/thirdparty/libsimplewebm/WebMDemuxer.cpp +++ /dev/null @@ -1,241 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#include "WebMDemuxer.hpp" - -#include "mkvparser/mkvparser.h" - -#include -#include -#include - -WebMFrame::WebMFrame() : - bufferSize(0), bufferCapacity(0), - buffer(NULL), - time(0), - key(false) -{} -WebMFrame::~WebMFrame() -{ - free(buffer); -} - -/**/ - -WebMDemuxer::WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack, int audioTrack) : - m_reader(reader), - m_segment(NULL), - m_cluster(NULL), m_block(NULL), m_blockEntry(NULL), - m_blockFrameIndex(0), - m_videoTrack(NULL), m_vCodec(NO_VIDEO), - m_audioTrack(NULL), m_aCodec(NO_AUDIO), - m_isOpen(false), - m_eos(false) -{ - long long pos = 0; - if (mkvparser::EBMLHeader().Parse(m_reader, pos)) - return; - - if (mkvparser::Segment::CreateInstance(m_reader, pos, m_segment)) - return; - - if (m_segment->Load() < 0) - return; - - const mkvparser::Tracks *tracks = m_segment->GetTracks(); - const unsigned long tracksCount = tracks->GetTracksCount(); - int currVideoTrack = -1, currAudioTrack = -1; - for (unsigned long i = 0; i < tracksCount; ++i) - { - const mkvparser::Track *track = tracks->GetTrackByIndex(i); - if (const char *codecId = track->GetCodecId()) - { - if ((!m_videoTrack || currVideoTrack != videoTrack) && track->GetType() == mkvparser::Track::kVideo) - { - if (!strcmp(codecId, "V_VP8")) - m_vCodec = VIDEO_VP8; - else if (!strcmp(codecId, "V_VP9")) - m_vCodec = VIDEO_VP9; - if (m_vCodec != NO_VIDEO) - m_videoTrack = static_cast(track); - ++currVideoTrack; - } - if ((!m_audioTrack || currAudioTrack != audioTrack) && track->GetType() == mkvparser::Track::kAudio) - { - if (!strcmp(codecId, "A_VORBIS")) - m_aCodec = AUDIO_VORBIS; - else if (!strcmp(codecId, "A_OPUS")) - m_aCodec = AUDIO_OPUS; - if (m_aCodec != NO_AUDIO) - m_audioTrack = static_cast(track); - ++currAudioTrack; - } - } - } - if (!m_videoTrack && !m_audioTrack) - return; - - m_isOpen = true; -} -WebMDemuxer::~WebMDemuxer() -{ - delete m_segment; - delete m_reader; -} - -double WebMDemuxer::getLength() const -{ - return m_segment->GetDuration() / 1e9; -} - -WebMDemuxer::VIDEO_CODEC WebMDemuxer::getVideoCodec() const -{ - return m_vCodec; -} -int WebMDemuxer::getWidth() const -{ - return m_videoTrack->GetWidth(); -} -int WebMDemuxer::getHeight() const -{ - return m_videoTrack->GetHeight(); -} - -WebMDemuxer::AUDIO_CODEC WebMDemuxer::getAudioCodec() const -{ - return m_aCodec; -} -const unsigned char *WebMDemuxer::getAudioExtradata(size_t &size) const -{ - return m_audioTrack->GetCodecPrivate(size); -} -double WebMDemuxer::getSampleRate() const -{ - return m_audioTrack->GetSamplingRate(); -} -int WebMDemuxer::getChannels() const -{ - return m_audioTrack->GetChannels(); -} -int WebMDemuxer::getAudioDepth() const -{ - return m_audioTrack->GetBitDepth(); -} - -bool WebMDemuxer::readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame) -{ - const long videoTrackNumber = (videoFrame && m_videoTrack) ? m_videoTrack->GetNumber() : 0; - const long audioTrackNumber = (audioFrame && m_audioTrack) ? m_audioTrack->GetNumber() : 0; - bool blockEntryEOS = false; - - if (videoFrame) - videoFrame->bufferSize = 0; - if (audioFrame) - audioFrame->bufferSize = 0; - - if (videoTrackNumber == 0 && audioTrackNumber == 0) - return false; - - if (m_eos) - return false; - - if (!m_cluster) - m_cluster = m_segment->GetFirst(); - - do - { - bool getNewBlock = false; - long status = 0; - if (!m_blockEntry && !blockEntryEOS) - { - status = m_cluster->GetFirst(m_blockEntry); - getNewBlock = true; - } - else if (blockEntryEOS || m_blockEntry->EOS()) - { - m_cluster = m_segment->GetNext(m_cluster); - if (!m_cluster || m_cluster->EOS()) - { - m_eos = true; - return false; - } - status = m_cluster->GetFirst(m_blockEntry); - blockEntryEOS = false; - getNewBlock = true; - } - else if (!m_block || m_blockFrameIndex == m_block->GetFrameCount() || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber)) - { - status = m_cluster->GetNext(m_blockEntry, m_blockEntry); - if (!m_blockEntry || m_blockEntry->EOS()) - { - blockEntryEOS = true; - continue; - } - getNewBlock = true; - } - if (status || !m_blockEntry) - return false; - if (getNewBlock) - { - m_block = m_blockEntry->GetBlock(); - m_blockFrameIndex = 0; - } - } while (blockEntryEOS || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber)); - - WebMFrame *frame = NULL; - - const long trackNumber = m_block->GetTrackNumber(); - if (trackNumber == videoTrackNumber) - frame = videoFrame; - else if (trackNumber == audioTrackNumber) - frame = audioFrame; - else - { - //Should not be possible - assert(trackNumber == videoTrackNumber || trackNumber == audioTrackNumber); - return false; - } - - const mkvparser::Block::Frame &blockFrame = m_block->GetFrame(m_blockFrameIndex++); - if (blockFrame.len > frame->bufferCapacity) - { - unsigned char *newBuff = (unsigned char *)realloc(frame->buffer, frame->bufferCapacity = blockFrame.len); - if (newBuff) - frame->buffer = newBuff; - else // Out of memory - return false; - } - frame->bufferSize = blockFrame.len; - - frame->time = m_block->GetTime(m_cluster) / 1e9; - frame->key = m_block->IsKey(); - - return !blockFrame.Read(m_reader, frame->buffer); -} - -inline bool WebMDemuxer::notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const -{ - const long trackNumber = m_block->GetTrackNumber(); - return (trackNumber != videoTrackNumber && trackNumber != audioTrackNumber); -} diff --git a/thirdparty/libsimplewebm/WebMDemuxer.hpp b/thirdparty/libsimplewebm/WebMDemuxer.hpp deleted file mode 100644 index a45ddb3f26..0000000000 --- a/thirdparty/libsimplewebm/WebMDemuxer.hpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef WEBMDEMUXER_HPP -#define WEBMDEMUXER_HPP - -#include - -namespace mkvparser { - class IMkvReader; - class Segment; - class Cluster; - class Block; - class BlockEntry; - class VideoTrack; - class AudioTrack; -} - -class WebMFrame -{ - WebMFrame(const WebMFrame &); - void operator =(const WebMFrame &); -public: - WebMFrame(); - ~WebMFrame(); - - inline bool isValid() const - { - return bufferSize > 0; - } - - long bufferSize, bufferCapacity; - unsigned char *buffer; - double time; - bool key; -}; - -class WebMDemuxer -{ - WebMDemuxer(const WebMDemuxer &); - void operator =(const WebMDemuxer &); -public: - enum VIDEO_CODEC - { - NO_VIDEO, - VIDEO_VP8, - VIDEO_VP9 - }; - enum AUDIO_CODEC - { - NO_AUDIO, - AUDIO_VORBIS, - AUDIO_OPUS - }; - - WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack = 0, int audioTrack = 0); - ~WebMDemuxer(); - - inline bool isOpen() const - { - return m_isOpen; - } - inline bool isEOS() const - { - return m_eos; - } - - double getLength() const; - - VIDEO_CODEC getVideoCodec() const; - int getWidth() const; - int getHeight() const; - - AUDIO_CODEC getAudioCodec() const; - const unsigned char *getAudioExtradata(size_t &size) const; // Needed for Vorbis - double getSampleRate() const; - int getChannels() const; - int getAudioDepth() const; - - bool readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame); - -private: - inline bool notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const; - - mkvparser::IMkvReader *m_reader; - mkvparser::Segment *m_segment; - - const mkvparser::Cluster *m_cluster; - const mkvparser::Block *m_block; - const mkvparser::BlockEntry *m_blockEntry; - - int m_blockFrameIndex; - - const mkvparser::VideoTrack *m_videoTrack; - VIDEO_CODEC m_vCodec; - - const mkvparser::AudioTrack *m_audioTrack; - AUDIO_CODEC m_aCodec; - - bool m_isOpen; - bool m_eos; -}; - -#endif // WEBMDEMUXER_HPP diff --git a/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT b/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT deleted file mode 100644 index 9686ac13eb..0000000000 --- a/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT +++ /dev/null @@ -1,4 +0,0 @@ -# Names should be added to this file like so: -# Name or Organization - -Google Inc. diff --git a/thirdparty/libsimplewebm/libwebm/LICENSE.TXT b/thirdparty/libsimplewebm/libwebm/LICENSE.TXT deleted file mode 100644 index 7a6f99547d..0000000000 --- a/thirdparty/libsimplewebm/libwebm/LICENSE.TXT +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2010, Google Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - * Neither the name of Google nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/thirdparty/libsimplewebm/libwebm/PATENTS.TXT b/thirdparty/libsimplewebm/libwebm/PATENTS.TXT deleted file mode 100644 index caedf607e9..0000000000 --- a/thirdparty/libsimplewebm/libwebm/PATENTS.TXT +++ /dev/null @@ -1,23 +0,0 @@ -Additional IP Rights Grant (Patents) ------------------------------------- - -"These implementations" means the copyrightable works that implement the WebM -codecs distributed by Google as part of the WebM Project. - -Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, -royalty-free, irrevocable (except as stated in this section) patent license to -make, have made, use, offer to sell, sell, import, transfer, and otherwise -run, modify and propagate the contents of these implementations of WebM, where -such license applies only to those patent claims, both currently owned by -Google and acquired in the future, licensable by Google that are necessarily -infringed by these implementations of WebM. This grant does not include claims -that would be infringed only as a consequence of further modification of these -implementations. If you or your agent or exclusive licensee institute or order -or agree to the institution of patent litigation or any other patent -enforcement activity against any entity (including a cross-claim or -counterclaim in a lawsuit) alleging that any of these implementations of WebM -or any code incorporated within any of these implementations of WebM -constitute direct or contributory patent infringement, or inducement of -patent infringement, then any patent rights granted to you under this License -for these implementations of WebM shall terminate as of the date such -litigation is filed. diff --git a/thirdparty/libsimplewebm/libwebm/README.libvpx b/thirdparty/libsimplewebm/libwebm/README.libvpx deleted file mode 100644 index 1aa93b75aa..0000000000 --- a/thirdparty/libsimplewebm/libwebm/README.libvpx +++ /dev/null @@ -1,11 +0,0 @@ -URL: https://chromium.googlesource.com/webm/libwebm -Version: d7c62173ff6b4a5e0a2f86683a5b67db98cf09bf -License: BSD -License File: LICENSE.txt - -Description: -libwebm is used to handle WebM container I/O. - -Local Changes: -* Removed: "mkvmuxer", "hdr_util", "file_util", "mkv_reader". -* Make "~IMkvRerader()" public. diff --git a/thirdparty/libsimplewebm/libwebm/common/webmids.h b/thirdparty/libsimplewebm/libwebm/common/webmids.h deleted file mode 100644 index 89d722a71b..0000000000 --- a/thirdparty/libsimplewebm/libwebm/common/webmids.h +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef COMMON_WEBMIDS_H_ -#define COMMON_WEBMIDS_H_ - -namespace libwebm { - -enum MkvId { - kMkvEBML = 0x1A45DFA3, - kMkvEBMLVersion = 0x4286, - kMkvEBMLReadVersion = 0x42F7, - kMkvEBMLMaxIDLength = 0x42F2, - kMkvEBMLMaxSizeLength = 0x42F3, - kMkvDocType = 0x4282, - kMkvDocTypeVersion = 0x4287, - kMkvDocTypeReadVersion = 0x4285, - kMkvVoid = 0xEC, - kMkvSignatureSlot = 0x1B538667, - kMkvSignatureAlgo = 0x7E8A, - kMkvSignatureHash = 0x7E9A, - kMkvSignaturePublicKey = 0x7EA5, - kMkvSignature = 0x7EB5, - kMkvSignatureElements = 0x7E5B, - kMkvSignatureElementList = 0x7E7B, - kMkvSignedElement = 0x6532, - // segment - kMkvSegment = 0x18538067, - // Meta Seek Information - kMkvSeekHead = 0x114D9B74, - kMkvSeek = 0x4DBB, - kMkvSeekID = 0x53AB, - kMkvSeekPosition = 0x53AC, - // Segment Information - kMkvInfo = 0x1549A966, - kMkvTimecodeScale = 0x2AD7B1, - kMkvDuration = 0x4489, - kMkvDateUTC = 0x4461, - kMkvTitle = 0x7BA9, - kMkvMuxingApp = 0x4D80, - kMkvWritingApp = 0x5741, - // Cluster - kMkvCluster = 0x1F43B675, - kMkvTimecode = 0xE7, - kMkvPrevSize = 0xAB, - kMkvBlockGroup = 0xA0, - kMkvBlock = 0xA1, - kMkvBlockDuration = 0x9B, - kMkvReferenceBlock = 0xFB, - kMkvLaceNumber = 0xCC, - kMkvSimpleBlock = 0xA3, - kMkvBlockAdditions = 0x75A1, - kMkvBlockMore = 0xA6, - kMkvBlockAddID = 0xEE, - kMkvBlockAdditional = 0xA5, - kMkvDiscardPadding = 0x75A2, - // Track - kMkvTracks = 0x1654AE6B, - kMkvTrackEntry = 0xAE, - kMkvTrackNumber = 0xD7, - kMkvTrackUID = 0x73C5, - kMkvTrackType = 0x83, - kMkvFlagEnabled = 0xB9, - kMkvFlagDefault = 0x88, - kMkvFlagForced = 0x55AA, - kMkvFlagLacing = 0x9C, - kMkvDefaultDuration = 0x23E383, - kMkvMaxBlockAdditionID = 0x55EE, - kMkvName = 0x536E, - kMkvLanguage = 0x22B59C, - kMkvCodecID = 0x86, - kMkvCodecPrivate = 0x63A2, - kMkvCodecName = 0x258688, - kMkvCodecDelay = 0x56AA, - kMkvSeekPreRoll = 0x56BB, - // video - kMkvVideo = 0xE0, - kMkvFlagInterlaced = 0x9A, - kMkvStereoMode = 0x53B8, - kMkvAlphaMode = 0x53C0, - kMkvPixelWidth = 0xB0, - kMkvPixelHeight = 0xBA, - kMkvPixelCropBottom = 0x54AA, - kMkvPixelCropTop = 0x54BB, - kMkvPixelCropLeft = 0x54CC, - kMkvPixelCropRight = 0x54DD, - kMkvDisplayWidth = 0x54B0, - kMkvDisplayHeight = 0x54BA, - kMkvDisplayUnit = 0x54B2, - kMkvAspectRatioType = 0x54B3, - kMkvFrameRate = 0x2383E3, - // end video - // colour - kMkvColour = 0x55B0, - kMkvMatrixCoefficients = 0x55B1, - kMkvBitsPerChannel = 0x55B2, - kMkvChromaSubsamplingHorz = 0x55B3, - kMkvChromaSubsamplingVert = 0x55B4, - kMkvCbSubsamplingHorz = 0x55B5, - kMkvCbSubsamplingVert = 0x55B6, - kMkvChromaSitingHorz = 0x55B7, - kMkvChromaSitingVert = 0x55B8, - kMkvRange = 0x55B9, - kMkvTransferCharacteristics = 0x55BA, - kMkvPrimaries = 0x55BB, - kMkvMaxCLL = 0x55BC, - kMkvMaxFALL = 0x55BD, - // mastering metadata - kMkvMasteringMetadata = 0x55D0, - kMkvPrimaryRChromaticityX = 0x55D1, - kMkvPrimaryRChromaticityY = 0x55D2, - kMkvPrimaryGChromaticityX = 0x55D3, - kMkvPrimaryGChromaticityY = 0x55D4, - kMkvPrimaryBChromaticityX = 0x55D5, - kMkvPrimaryBChromaticityY = 0x55D6, - kMkvWhitePointChromaticityX = 0x55D7, - kMkvWhitePointChromaticityY = 0x55D8, - kMkvLuminanceMax = 0x55D9, - kMkvLuminanceMin = 0x55DA, - // end mastering metadata - // end colour - // projection - kMkvProjection = 0x7670, - kMkvProjectionType = 0x7671, - kMkvProjectionPrivate = 0x7672, - kMkvProjectionPoseYaw = 0x7673, - kMkvProjectionPosePitch = 0x7674, - kMkvProjectionPoseRoll = 0x7675, - // end projection - // audio - kMkvAudio = 0xE1, - kMkvSamplingFrequency = 0xB5, - kMkvOutputSamplingFrequency = 0x78B5, - kMkvChannels = 0x9F, - kMkvBitDepth = 0x6264, - // end audio - // ContentEncodings - kMkvContentEncodings = 0x6D80, - kMkvContentEncoding = 0x6240, - kMkvContentEncodingOrder = 0x5031, - kMkvContentEncodingScope = 0x5032, - kMkvContentEncodingType = 0x5033, - kMkvContentCompression = 0x5034, - kMkvContentCompAlgo = 0x4254, - kMkvContentCompSettings = 0x4255, - kMkvContentEncryption = 0x5035, - kMkvContentEncAlgo = 0x47E1, - kMkvContentEncKeyID = 0x47E2, - kMkvContentSignature = 0x47E3, - kMkvContentSigKeyID = 0x47E4, - kMkvContentSigAlgo = 0x47E5, - kMkvContentSigHashAlgo = 0x47E6, - kMkvContentEncAESSettings = 0x47E7, - kMkvAESSettingsCipherMode = 0x47E8, - kMkvAESSettingsCipherInitData = 0x47E9, - // end ContentEncodings - // Cueing Data - kMkvCues = 0x1C53BB6B, - kMkvCuePoint = 0xBB, - kMkvCueTime = 0xB3, - kMkvCueTrackPositions = 0xB7, - kMkvCueTrack = 0xF7, - kMkvCueClusterPosition = 0xF1, - kMkvCueBlockNumber = 0x5378, - // Chapters - kMkvChapters = 0x1043A770, - kMkvEditionEntry = 0x45B9, - kMkvChapterAtom = 0xB6, - kMkvChapterUID = 0x73C4, - kMkvChapterStringUID = 0x5654, - kMkvChapterTimeStart = 0x91, - kMkvChapterTimeEnd = 0x92, - kMkvChapterDisplay = 0x80, - kMkvChapString = 0x85, - kMkvChapLanguage = 0x437C, - kMkvChapCountry = 0x437E, - // Tags - kMkvTags = 0x1254C367, - kMkvTag = 0x7373, - kMkvSimpleTag = 0x67C8, - kMkvTagName = 0x45A3, - kMkvTagString = 0x4487 -}; - -} // namespace libwebm - -#endif // COMMON_WEBMIDS_H_ diff --git a/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h b/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h deleted file mode 100644 index e5db121605..0000000000 --- a/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef MKVMUXER_MKVMUXERTYPES_H_ -#define MKVMUXER_MKVMUXERTYPES_H_ - -namespace mkvmuxer { -typedef unsigned char uint8; -typedef short int16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -} // namespace mkvmuxer - -// Copied from Chromium basictypes.h -// A macro to disallow the copy constructor and operator= functions -// This should be used in the private: declarations for a class -#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) - -#endif // MKVMUXER_MKVMUXERTYPES_HPP_ diff --git a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc deleted file mode 100644 index e7b76f7da1..0000000000 --- a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc +++ /dev/null @@ -1,8049 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#include "mkvparser/mkvparser.h" - -#if defined(_MSC_VER) && _MSC_VER < 1800 -#include // _isnan() / _finite() -#define MSC_COMPAT -#endif - -#include -#include -#include -#include -#include -#include -#include - -#include "common/webmids.h" - -namespace mkvparser { -const long long kStringElementSizeLimit = 20 * 1000 * 1000; -const float MasteringMetadata::kValueNotPresent = FLT_MAX; -const long long Colour::kValueNotPresent = LLONG_MAX; -const float Projection::kValueNotPresent = FLT_MAX; - -#ifdef MSC_COMPAT -inline bool isnan(double val) { return !!_isnan(val); } -inline bool isinf(double val) { return !_finite(val); } -#else -inline bool isnan(double val) { return std::isnan(val); } -inline bool isinf(double val) { return std::isinf(val); } -#endif // MSC_COMPAT - -IMkvReader::~IMkvReader() {} - -template -Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size) { - if (num_elements == 0 || element_size == 0) - return NULL; - - const size_t kMaxAllocSize = 0x80000000; // 2GiB - const unsigned long long num_bytes = num_elements * element_size; - if (element_size > (kMaxAllocSize / num_elements)) - return NULL; - if (num_bytes != static_cast(num_bytes)) - return NULL; - - return new (std::nothrow) Type[static_cast(num_bytes)]; -} - -void GetVersion(int& major, int& minor, int& build, int& revision) { - major = 1; - minor = 0; - build = 0; - revision = 30; -} - -long long ReadUInt(IMkvReader* pReader, long long pos, long& len) { - if (!pReader || pos < 0) - return E_FILE_FORMAT_INVALID; - - len = 1; - unsigned char b; - int status = pReader->Read(pos, 1, &b); - - if (status < 0) // error or underflow - return status; - - if (status > 0) // interpreted as "underflow" - return E_BUFFER_NOT_FULL; - - if (b == 0) // we can't handle u-int values larger than 8 bytes - return E_FILE_FORMAT_INVALID; - - unsigned char m = 0x80; - - while (!(b & m)) { - m >>= 1; - ++len; - } - - long long result = b & (~m); - ++pos; - - for (int i = 1; i < len; ++i) { - status = pReader->Read(pos, 1, &b); - - if (status < 0) { - len = 1; - return status; - } - - if (status > 0) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result <<= 8; - result |= b; - - ++pos; - } - - return result; -} - -// Reads an EBML ID and returns it. -// An ID must at least 1 byte long, cannot exceed 4, and its value must be -// greater than 0. -// See known EBML values and EBMLMaxIDLength: -// http://www.matroska.org/technical/specs/index.html -// Returns the ID, or a value less than 0 to report an error while reading the -// ID. -long long ReadID(IMkvReader* pReader, long long pos, long& len) { - if (pReader == NULL || pos < 0) - return E_FILE_FORMAT_INVALID; - - // Read the first byte. The length in bytes of the ID is determined by - // finding the first set bit in the first byte of the ID. - unsigned char temp_byte = 0; - int read_status = pReader->Read(pos, 1, &temp_byte); - - if (read_status < 0) - return E_FILE_FORMAT_INVALID; - else if (read_status > 0) // No data to read. - return E_BUFFER_NOT_FULL; - - if (temp_byte == 0) // ID length > 8 bytes; invalid file. - return E_FILE_FORMAT_INVALID; - - int bit_pos = 0; - const int kMaxIdLengthInBytes = 4; - const int kCheckByte = 0x80; - - // Find the first bit that's set. - bool found_bit = false; - for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) { - if ((kCheckByte >> bit_pos) & temp_byte) { - found_bit = true; - break; - } - } - - if (!found_bit) { - // The value is too large to be a valid ID. - return E_FILE_FORMAT_INVALID; - } - - // Read the remaining bytes of the ID (if any). - const int id_length = bit_pos + 1; - long long ebml_id = temp_byte; - for (int i = 1; i < id_length; ++i) { - ebml_id <<= 8; - read_status = pReader->Read(pos + i, 1, &temp_byte); - - if (read_status < 0) - return E_FILE_FORMAT_INVALID; - else if (read_status > 0) - return E_BUFFER_NOT_FULL; - - ebml_id |= temp_byte; - } - - len = id_length; - return ebml_id; -} - -long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) { - if (!pReader || pos < 0) - return E_FILE_FORMAT_INVALID; - - long long total, available; - - int status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return E_FILE_FORMAT_INVALID; - - len = 1; - - if (pos >= available) - return pos; // too few bytes available - - unsigned char b; - - status = pReader->Read(pos, 1, &b); - - if (status != 0) - return status; - - if (b == 0) // we can't handle u-int values larger than 8 bytes - return E_FILE_FORMAT_INVALID; - - unsigned char m = 0x80; - - while (!(b & m)) { - m >>= 1; - ++len; - } - - return 0; // success -} - -// TODO(vigneshv): This function assumes that unsigned values never have their -// high bit set. -long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) { - if (!pReader || pos < 0 || (size <= 0) || (size > 8)) - return E_FILE_FORMAT_INVALID; - - long long result = 0; - - for (long long i = 0; i < size; ++i) { - unsigned char b; - - const long status = pReader->Read(pos, 1, &b); - - if (status < 0) - return status; - - result <<= 8; - result |= b; - - ++pos; - } - - return result; -} - -long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_, - double& result) { - if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8))) - return E_FILE_FORMAT_INVALID; - - const long size = static_cast(size_); - - unsigned char buf[8]; - - const int status = pReader->Read(pos, size, buf); - - if (status < 0) // error - return status; - - if (size == 4) { - union { - float f; - unsigned long ff; - }; - - ff = 0; - - for (int i = 0;;) { - ff |= buf[i]; - - if (++i >= 4) - break; - - ff <<= 8; - } - - result = f; - } else { - union { - double d; - unsigned long long dd; - }; - - dd = 0; - - for (int i = 0;;) { - dd |= buf[i]; - - if (++i >= 8) - break; - - dd <<= 8; - } - - result = d; - } - - if (mkvparser::isinf(result) || mkvparser::isnan(result)) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long UnserializeInt(IMkvReader* pReader, long long pos, long long size, - long long& result_ref) { - if (!pReader || pos < 0 || size < 1 || size > 8) - return E_FILE_FORMAT_INVALID; - - signed char first_byte = 0; - const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte); - - if (status < 0) - return status; - - unsigned long long result = first_byte; - ++pos; - - for (long i = 1; i < size; ++i) { - unsigned char b; - - const long status = pReader->Read(pos, 1, &b); - - if (status < 0) - return status; - - result <<= 8; - result |= b; - - ++pos; - } - - result_ref = static_cast(result); - return 0; -} - -long UnserializeString(IMkvReader* pReader, long long pos, long long size, - char*& str) { - delete[] str; - str = NULL; - - if (size >= LONG_MAX || size < 0 || size > kStringElementSizeLimit) - return E_FILE_FORMAT_INVALID; - - // +1 for '\0' terminator - const long required_size = static_cast(size) + 1; - - str = SafeArrayAlloc(1, required_size); - if (str == NULL) - return E_FILE_FORMAT_INVALID; - - unsigned char* const buf = reinterpret_cast(str); - - const long status = pReader->Read(pos, static_cast(size), buf); - - if (status) { - delete[] str; - str = NULL; - - return status; - } - - str[required_size - 1] = '\0'; - return 0; -} - -long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop, - long long& id, long long& size) { - if (stop >= 0 && pos >= stop) - return E_FILE_FORMAT_INVALID; - - long len; - - id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume id - - if (stop >= 0 && pos >= stop) - return E_FILE_FORMAT_INVALID; - - size = ReadUInt(pReader, pos, len); - - if (size < 0 || len < 1 || len > 8) { - // Invalid: Negative payload size, negative or 0 length integer, or integer - // larger than 64 bits (libwebm cannot handle them). - return E_FILE_FORMAT_INVALID; - } - - // Avoid rolling over pos when very close to LLONG_MAX. - const unsigned long long rollover_check = - static_cast(pos) + len; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size - - // pos now designates payload - - if (stop >= 0 && pos > stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, - long long& val) { - if (!pReader || pos < 0) - return false; - - long long total = 0; - long long available = 0; - - const long status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return false; - - long len = 0; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (available - pos) > len) - return false; - - if (static_cast(id) != expected_id) - return false; - - pos += len; // consume id - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len) - return false; - - pos += len; // consume length of size of payload - - val = UnserializeUInt(pReader, pos, size); - if (val < 0) - return false; - - pos += size; // consume size of payload - - return true; -} - -bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, - unsigned char*& buf, size_t& buflen) { - if (!pReader || pos < 0) - return false; - - long long total = 0; - long long available = 0; - - long status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return false; - - long len = 0; - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (available - pos) > len) - return false; - - if (static_cast(id) != expected_id) - return false; - - pos += len; // consume id - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || len <= 0 || len > 8 || (available - pos) > len) - return false; - - unsigned long long rollover_check = - static_cast(pos) + len; - if (rollover_check > LLONG_MAX) - return false; - - pos += len; // consume length of size of payload - - rollover_check = static_cast(pos) + size; - if (rollover_check > LLONG_MAX) - return false; - - if ((pos + size) > available) - return false; - - if (size >= LONG_MAX) - return false; - - const long buflen_ = static_cast(size); - - buf = SafeArrayAlloc(1, buflen_); - if (!buf) - return false; - - status = pReader->Read(pos, buflen_, buf); - if (status != 0) - return false; - - buflen = buflen_; - - pos += size; // consume size of payload - return true; -} - -EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); } - -EBMLHeader::~EBMLHeader() { delete[] m_docType; } - -void EBMLHeader::Init() { - m_version = 1; - m_readVersion = 1; - m_maxIdLength = 4; - m_maxSizeLength = 8; - - if (m_docType) { - delete[] m_docType; - m_docType = NULL; - } - - m_docTypeVersion = 1; - m_docTypeReadVersion = 1; -} - -long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { - if (!pReader) - return E_FILE_FORMAT_INVALID; - - long long total, available; - - long status = pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - pos = 0; - - // Scan until we find what looks like the first byte of the EBML header. - const long long kMaxScanBytes = (available >= 1024) ? 1024 : available; - const unsigned char kEbmlByte0 = 0x1A; - unsigned char scan_byte = 0; - - while (pos < kMaxScanBytes) { - status = pReader->Read(pos, 1, &scan_byte); - - if (status < 0) // error - return status; - else if (status > 0) - return E_BUFFER_NOT_FULL; - - if (scan_byte == kEbmlByte0) - break; - - ++pos; - } - - long len = 0; - const long long ebml_id = ReadID(pReader, pos, len); - - if (ebml_id == E_BUFFER_NOT_FULL) - return E_BUFFER_NOT_FULL; - - if (len != 4 || ebml_id != libwebm::kMkvEBML) - return E_FILE_FORMAT_INVALID; - - // Move read pos forward to the EBML header size field. - pos += 4; - - // Read length of size field. - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return E_FILE_FORMAT_INVALID; - else if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if (len < 1 || len > 8) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((total - pos) < len)) - return E_FILE_FORMAT_INVALID; - - if ((available - pos) < len) - return pos + len; // try again later - - // Read the EBML header size. - result = ReadUInt(pReader, pos, len); - - if (result < 0) // error - return result; - - pos += len; // consume size field - - // pos now designates start of payload - - if ((total >= 0) && ((total - pos) < result)) - return E_FILE_FORMAT_INVALID; - - if ((available - pos) < result) - return pos + result; - - const long long end = pos + result; - - Init(); - - while (pos < end) { - long long id, size; - - status = ParseElementHeader(pReader, pos, end, id, size); - - if (status < 0) // error - return status; - - if (size == 0) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvEBMLVersion) { - m_version = UnserializeUInt(pReader, pos, size); - - if (m_version <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLReadVersion) { - m_readVersion = UnserializeUInt(pReader, pos, size); - - if (m_readVersion <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLMaxIDLength) { - m_maxIdLength = UnserializeUInt(pReader, pos, size); - - if (m_maxIdLength <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLMaxSizeLength) { - m_maxSizeLength = UnserializeUInt(pReader, pos, size); - - if (m_maxSizeLength <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDocType) { - if (m_docType) - return E_FILE_FORMAT_INVALID; - - status = UnserializeString(pReader, pos, size, m_docType); - - if (status) // error - return status; - } else if (id == libwebm::kMkvDocTypeVersion) { - m_docTypeVersion = UnserializeUInt(pReader, pos, size); - - if (m_docTypeVersion <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDocTypeReadVersion) { - m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); - - if (m_docTypeReadVersion <= 0) - return E_FILE_FORMAT_INVALID; - } - - pos += size; - } - - if (pos != end) - return E_FILE_FORMAT_INVALID; - - // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid. - if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0) - return E_FILE_FORMAT_INVALID; - - // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid. - if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 || - m_maxSizeLength > 8) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -Segment::Segment(IMkvReader* pReader, long long elem_start, - // long long elem_size, - long long start, long long size) - : m_pReader(pReader), - m_element_start(elem_start), - // m_element_size(elem_size), - m_start(start), - m_size(size), - m_pos(start), - m_pUnknownSize(0), - m_pSeekHead(NULL), - m_pInfo(NULL), - m_pTracks(NULL), - m_pCues(NULL), - m_pChapters(NULL), - m_pTags(NULL), - m_clusters(NULL), - m_clusterCount(0), - m_clusterPreloadCount(0), - m_clusterSize(0) {} - -Segment::~Segment() { - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** i = m_clusters; - Cluster** j = m_clusters + count; - - while (i != j) { - Cluster* const p = *i++; - delete p; - } - - delete[] m_clusters; - - delete m_pTracks; - delete m_pInfo; - delete m_pCues; - delete m_pChapters; - delete m_pTags; - delete m_pSeekHead; -} - -long long Segment::CreateInstance(IMkvReader* pReader, long long pos, - Segment*& pSegment) { - if (pReader == NULL || pos < 0) - return E_PARSE_FAILED; - - pSegment = NULL; - - long long total, available; - - const long status = pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - if (available < 0) - return -1; - - if ((total >= 0) && (available > total)) - return -1; - - // I would assume that in practice this loop would execute - // exactly once, but we allow for other elements (e.g. Void) - // to immediately follow the EBML header. This is fine for - // the source filter case (since the entire file is available), - // but in the splitter case over a network we should probably - // just give up early. We could for example decide only to - // execute this loop a maximum of, say, 10 times. - // TODO: - // There is an implied "give up early" by only parsing up - // to the available limit. We do do that, but only if the - // total file size is unknown. We could decide to always - // use what's available as our limit (irrespective of whether - // we happen to know the total file length). This would have - // as its sense "parse this much of the file before giving up", - // which a slightly different sense from "try to parse up to - // 10 EMBL elements before giving up". - - for (;;) { - if ((total >= 0) && (pos >= total)) - return E_FILE_FORMAT_INVALID; - - // Read ID - long len; - long long result = GetUIntLength(pReader, pos, len); - - if (result) // error, or too few available bytes - return result; - - if ((total >= 0) && ((pos + len) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long idpos = pos; - const long long id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - // Read Size - - result = GetUIntLength(pReader, pos, len); - - if (result) // error, or too few available bytes - return result; - - if ((total >= 0) && ((pos + len) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return size; - - pos += len; // consume length of size of element - - // Pos now points to start of payload - - // Handle "unknown size" for live streaming of webm files. - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (id == libwebm::kMkvSegment) { - if (size == unknown_size) - size = -1; - - else if (total < 0) - size = -1; - - else if ((pos + size) > total) - size = -1; - - pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size); - if (pSegment == NULL) - return E_PARSE_FAILED; - - return 0; // success - } - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + size) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + size) > available) - return pos + size; - - pos += size; // consume payload - } -} - -long long Segment::ParseHeaders() { - // Outermost (level 0) segment object has been constructed, - // and pos designates start of payload. We need to find the - // inner (level 1) elements. - long long total, available; - - const int status = m_pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - if (total > 0 && available > total) - return E_FILE_FORMAT_INVALID; - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - if ((segment_stop >= 0 && total >= 0 && segment_stop > total) || - (segment_stop >= 0 && m_pos > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - for (;;) { - if ((total >= 0) && (m_pos >= total)) - break; - - if ((segment_stop >= 0) && (m_pos >= segment_stop)) - break; - - long long pos = m_pos; - const long long element_start = pos; - - // Avoid rolling over pos when very close to LLONG_MAX. - unsigned long long rollover_check = pos + 1ULL; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > available) - return (pos + 1); - - long len; - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return result; - - if (result > 0) { - // MkvReader doesn't have enough data to satisfy this read attempt. - return (pos + 1); - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvCluster) - break; - - pos += len; // consume ID - - if ((pos + 1) > available) - return (pos + 1); - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return result; - - if (result > 0) { - // MkvReader doesn't have enough data to satisfy this read attempt. - return (pos + 1); - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0 || len < 1 || len > 8) { - // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or - // len > 8 is true instead of checking this _everywhere_. - return size; - } - - pos += len; // consume length of size of element - - // Avoid rolling over pos when very close to LLONG_MAX. - rollover_check = static_cast(pos) + size; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - const long long element_size = size + pos - element_start; - - // Pos now points to start of payload - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - // We read EBML elements either in total or nothing at all. - - if ((pos + size) > available) - return pos + size; - - if (id == libwebm::kMkvInfo) { - if (m_pInfo) - return E_FILE_FORMAT_INVALID; - - m_pInfo = new (std::nothrow) - SegmentInfo(this, pos, size, element_start, element_size); - - if (m_pInfo == NULL) - return -1; - - const long status = m_pInfo->Parse(); - - if (status) - return status; - } else if (id == libwebm::kMkvTracks) { - if (m_pTracks) - return E_FILE_FORMAT_INVALID; - - m_pTracks = new (std::nothrow) - Tracks(this, pos, size, element_start, element_size); - - if (m_pTracks == NULL) - return -1; - - const long status = m_pTracks->Parse(); - - if (status) - return status; - } else if (id == libwebm::kMkvCues) { - if (m_pCues == NULL) { - m_pCues = new (std::nothrow) - Cues(this, pos, size, element_start, element_size); - - if (m_pCues == NULL) - return -1; - } - } else if (id == libwebm::kMkvSeekHead) { - if (m_pSeekHead == NULL) { - m_pSeekHead = new (std::nothrow) - SeekHead(this, pos, size, element_start, element_size); - - if (m_pSeekHead == NULL) - return -1; - - const long status = m_pSeekHead->Parse(); - - if (status) - return status; - } - } else if (id == libwebm::kMkvChapters) { - if (m_pChapters == NULL) { - m_pChapters = new (std::nothrow) - Chapters(this, pos, size, element_start, element_size); - - if (m_pChapters == NULL) - return -1; - - const long status = m_pChapters->Parse(); - - if (status) - return status; - } - } else if (id == libwebm::kMkvTags) { - if (m_pTags == NULL) { - m_pTags = new (std::nothrow) - Tags(this, pos, size, element_start, element_size); - - if (m_pTags == NULL) - return -1; - - const long status = m_pTags->Parse(); - - if (status) - return status; - } - } - - m_pos = pos + size; // consume payload - } - - if (segment_stop >= 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - if (m_pInfo == NULL) // TODO: liberalize this behavior - return E_FILE_FORMAT_INVALID; - - if (m_pTracks == NULL) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -long Segment::LoadCluster(long long& pos, long& len) { - for (;;) { - const long result = DoLoadCluster(pos, len); - - if (result <= 1) - return result; - } -} - -long Segment::DoLoadCluster(long long& pos, long& len) { - if (m_pos < 0) - return DoLoadClusterUnknownSize(pos, len); - - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && avail > total) - return E_FILE_FORMAT_INVALID; - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - long long cluster_off = -1; // offset relative to start of segment - long long cluster_size = -1; // size of cluster payload - - for (;;) { - if ((total >= 0) && (m_pos >= total)) - return 1; // no more clusters - - if ((segment_stop >= 0) && (m_pos >= segment_stop)) - return 1; // no more clusters - - pos = m_pos; - - // Read ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume length of size of element - - // pos now points to start of payload - - if (size == 0) { - // Missing element payload: move on. - m_pos = pos; - continue; - } - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if ((segment_stop >= 0) && (size != unknown_size) && - ((pos + size) > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - if (id == libwebm::kMkvCues) { - if (size == unknown_size) { - // Cues element of unknown size: Not supported. - return E_FILE_FORMAT_INVALID; - } - - if (m_pCues == NULL) { - const long long element_size = (pos - idpos) + size; - - m_pCues = new (std::nothrow) Cues(this, pos, size, idpos, element_size); - if (m_pCues == NULL) - return -1; - } - - m_pos = pos + size; // consume payload - continue; - } - - if (id != libwebm::kMkvCluster) { - // Besides the Segment, Libwebm allows only cluster elements of unknown - // size. Fail the parse upon encountering a non-cluster element reporting - // unknown size. - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - m_pos = pos + size; // consume payload - continue; - } - - // We have a cluster. - - cluster_off = idpos - m_start; // relative pos - - if (size != unknown_size) - cluster_size = size; - - break; - } - - if (cluster_off < 0) { - // No cluster, die. - return E_FILE_FORMAT_INVALID; - } - - long long pos_; - long len_; - - status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_); - - if (status < 0) { // error, or underflow - pos = pos_; - len = len_; - - return status; - } - - // status == 0 means "no block entries found" - // status > 0 means "found at least one block entry" - - // TODO: - // The issue here is that the segment increments its own - // pos ptr past the most recent cluster parsed, and then - // starts from there to parse the next cluster. If we - // don't know the size of the current cluster, then we - // must either parse its payload (as we do below), looking - // for the cluster (or cues) ID to terminate the parse. - // This isn't really what we want: rather, we really need - // a way to create the curr cluster object immediately. - // The pity is that cluster::parse can determine its own - // boundary, and we largely duplicate that same logic here. - // - // Maybe we need to get rid of our look-ahead preloading - // in source::parse??? - // - // As we're parsing the blocks in the curr cluster - //(in cluster::parse), we should have some way to signal - // to the segment that we have determined the boundary, - // so it can adjust its own segment::m_pos member. - // - // The problem is that we're asserting in asyncreadinit, - // because we adjust the pos down to the curr seek pos, - // and the resulting adjusted len is > 2GB. I'm suspicious - // that this is even correct, but even if it is, we can't - // be loading that much data in the cache anyway. - - const long idx = m_clusterCount; - - if (m_clusterPreloadCount > 0) { - if (idx >= m_clusterSize) - return E_FILE_FORMAT_INVALID; - - Cluster* const pCluster = m_clusters[idx]; - if (pCluster == NULL || pCluster->m_index >= 0) - return E_FILE_FORMAT_INVALID; - - const long long off = pCluster->GetPosition(); - if (off < 0) - return E_FILE_FORMAT_INVALID; - - if (off == cluster_off) { // preloaded already - if (status == 0) // no entries found - return E_FILE_FORMAT_INVALID; - - if (cluster_size >= 0) - pos += cluster_size; - else { - const long long element_size = pCluster->GetElementSize(); - - if (element_size <= 0) - return E_FILE_FORMAT_INVALID; // TODO: handle this case - - pos = pCluster->m_element_start + element_size; - } - - pCluster->m_index = idx; // move from preloaded to loaded - ++m_clusterCount; - --m_clusterPreloadCount; - - m_pos = pos; // consume payload - if (segment_stop >= 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success - } - } - - if (status == 0) { // no entries found - if (cluster_size >= 0) - pos += cluster_size; - - if ((total >= 0) && (pos >= total)) { - m_pos = total; - return 1; // no more clusters - } - - if ((segment_stop >= 0) && (pos >= segment_stop)) { - m_pos = segment_stop; - return 1; // no more clusters - } - - m_pos = pos; - return 2; // try again - } - - // status > 0 means we have an entry - - Cluster* const pCluster = Cluster::Create(this, idx, cluster_off); - if (pCluster == NULL) - return -1; - - if (!AppendCluster(pCluster)) { - delete pCluster; - return -1; - } - - if (cluster_size >= 0) { - pos += cluster_size; - - m_pos = pos; - - if (segment_stop > 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 0; - } - - m_pUnknownSize = pCluster; - m_pos = -pos; - - return 0; // partial success, since we have a new cluster - - // status == 0 means "no block entries found" - // pos designates start of payload - // m_pos has NOT been adjusted yet (in case we need to come back here) -} - -long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { - if (m_pos >= 0 || m_pUnknownSize == NULL) - return E_PARSE_FAILED; - - const long status = m_pUnknownSize->Parse(pos, len); - - if (status < 0) // error or underflow - return status; - - if (status == 0) // parsed a block - return 2; // continue parsing - - const long long start = m_pUnknownSize->m_element_start; - const long long size = m_pUnknownSize->GetElementSize(); - - if (size < 0) - return E_FILE_FORMAT_INVALID; - - pos = start + size; - m_pos = pos; - - m_pUnknownSize = 0; - - return 2; // continue parsing -} - -bool Segment::AppendCluster(Cluster* pCluster) { - if (pCluster == NULL || pCluster->m_index < 0) - return false; - - const long count = m_clusterCount + m_clusterPreloadCount; - - long& size = m_clusterSize; - const long idx = pCluster->m_index; - - if (size < count || idx != m_clusterCount) - return false; - - if (count >= size) { - const long n = (size <= 0) ? 2048 : 2 * size; - - Cluster** const qq = new (std::nothrow) Cluster*[n]; - if (qq == NULL) - return false; - - Cluster** q = qq; - Cluster** p = m_clusters; - Cluster** const pp = p + count; - - while (p != pp) - *q++ = *p++; - - delete[] m_clusters; - - m_clusters = qq; - size = n; - } - - if (m_clusterPreloadCount > 0) { - Cluster** const p = m_clusters + m_clusterCount; - if (*p == NULL || (*p)->m_index >= 0) - return false; - - Cluster** q = p + m_clusterPreloadCount; - if (q >= (m_clusters + size)) - return false; - - for (;;) { - Cluster** const qq = q - 1; - if ((*qq)->m_index >= 0) - return false; - - *q = *qq; - q = qq; - - if (q == p) - break; - } - } - - m_clusters[idx] = pCluster; - ++m_clusterCount; - return true; -} - -bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { - if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount) - return false; - - const long count = m_clusterCount + m_clusterPreloadCount; - - long& size = m_clusterSize; - if (size < count) - return false; - - if (count >= size) { - const long n = (size <= 0) ? 2048 : 2 * size; - - Cluster** const qq = new (std::nothrow) Cluster*[n]; - if (qq == NULL) - return false; - Cluster** q = qq; - - Cluster** p = m_clusters; - Cluster** const pp = p + count; - - while (p != pp) - *q++ = *p++; - - delete[] m_clusters; - - m_clusters = qq; - size = n; - } - - if (m_clusters == NULL) - return false; - - Cluster** const p = m_clusters + idx; - - Cluster** q = m_clusters + count; - if (q < p || q >= (m_clusters + size)) - return false; - - while (q > p) { - Cluster** const qq = q - 1; - - if ((*qq)->m_index >= 0) - return false; - - *q = *qq; - q = qq; - } - - m_clusters[idx] = pCluster; - ++m_clusterPreloadCount; - return true; -} - -long Segment::Load() { - if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0) - return E_PARSE_FAILED; - - // Outermost (level 0) segment object has been constructed, - // and pos designates start of payload. We need to find the - // inner (level 1) elements. - - const long long header_status = ParseHeaders(); - - if (header_status < 0) // error - return static_cast(header_status); - - if (header_status > 0) // underflow - return E_BUFFER_NOT_FULL; - - if (m_pInfo == NULL || m_pTracks == NULL) - return E_FILE_FORMAT_INVALID; - - for (;;) { - const long status = LoadCluster(); - - if (status < 0) // error - return status; - - if (status >= 1) // no more clusters - return 0; - } -} - -SeekHead::Entry::Entry() : id(0), pos(0), element_start(0), element_size(0) {} - -SeekHead::SeekHead(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_entries(0), - m_entry_count(0), - m_void_elements(0), - m_void_element_count(0) {} - -SeekHead::~SeekHead() { - delete[] m_entries; - delete[] m_void_elements; -} - -long SeekHead::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; - const long long stop = m_start + m_size; - - // first count the seek head entries - - int entry_count = 0; - int void_element_count = 0; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSeek) - ++entry_count; - else if (id == libwebm::kMkvVoid) - ++void_element_count; - - pos += size; // consume payload - - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - if (entry_count > 0) { - m_entries = new (std::nothrow) Entry[entry_count]; - - if (m_entries == NULL) - return -1; - } - - if (void_element_count > 0) { - m_void_elements = new (std::nothrow) VoidElement[void_element_count]; - - if (m_void_elements == NULL) - return -1; - } - - // now parse the entries and void elements - - Entry* pEntry = m_entries; - VoidElement* pVoidElement = m_void_elements; - - pos = m_start; - - while (pos < stop) { - const long long idpos = pos; - - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSeek && entry_count > 0) { - if (ParseEntry(pReader, pos, size, pEntry)) { - Entry& e = *pEntry++; - - e.element_start = idpos; - e.element_size = (pos + size) - idpos; - } - } else if (id == libwebm::kMkvVoid && void_element_count > 0) { - VoidElement& e = *pVoidElement++; - - e.element_start = idpos; - e.element_size = (pos + size) - idpos; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries); - assert(count_ >= 0); - assert(count_ <= entry_count); - - m_entry_count = static_cast(count_); - - count_ = ptrdiff_t(pVoidElement - m_void_elements); - assert(count_ >= 0); - assert(count_ <= void_element_count); - - m_void_element_count = static_cast(count_); - - return 0; -} - -int SeekHead::GetCount() const { return m_entry_count; } - -const SeekHead::Entry* SeekHead::GetEntry(int idx) const { - if (idx < 0) - return 0; - - if (idx >= m_entry_count) - return 0; - - return m_entries + idx; -} - -int SeekHead::GetVoidElementCount() const { return m_void_element_count; } - -const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const { - if (idx < 0) - return 0; - - if (idx >= m_void_element_count) - return 0; - - return m_void_elements + idx; -} - -long Segment::ParseCues(long long off, long long& pos, long& len) { - if (m_pCues) - return 0; // success - - if (off < 0) - return -1; - - long long total, avail; - - const int status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - pos = m_start + off; - - if ((total < 0) || (pos >= total)) - return 1; // don't bother parsing cues - - const long long element_start = pos; - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // underflow (weird) - { - len = 1; - return E_BUFFER_NOT_FULL; - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - - const long long id = ReadID(m_pReader, idpos, len); - - if (id != libwebm::kMkvCues) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - assert((segment_stop < 0) || (pos <= segment_stop)); - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // underflow (weird) - { - len = 1; - return E_BUFFER_NOT_FULL; - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - if (size == 0) // weird, although technically not illegal - return 1; // done - - pos += len; // consume length of size of element - assert((segment_stop < 0) || (pos <= segment_stop)); - - // Pos now points to start of payload - - const long long element_stop = pos + size; - - if ((segment_stop >= 0) && (element_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (element_stop > total)) - return 1; // don't bother parsing anymore - - len = static_cast(size); - - if (element_stop > avail) - return E_BUFFER_NOT_FULL; - - const long long element_size = element_stop - element_start; - - m_pCues = - new (std::nothrow) Cues(this, pos, size, element_start, element_size); - if (m_pCues == NULL) - return -1; - - return 0; // success -} - -bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, - Entry* pEntry) { - if (size_ <= 0) - return false; - - long long pos = start; - const long long stop = start + size_; - - long len; - - // parse the container for the level-1 element ID - - const long long seekIdId = ReadID(pReader, pos, len); - if (seekIdId < 0) - return false; - - if (seekIdId != libwebm::kMkvSeekID) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume SeekID id - - const long long seekIdSize = ReadUInt(pReader, pos, len); - - if (seekIdSize <= 0) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume size of field - - if ((pos + seekIdSize) > stop) - return false; - - pEntry->id = ReadID(pReader, pos, len); // payload - - if (pEntry->id <= 0) - return false; - - if (len != seekIdSize) - return false; - - pos += seekIdSize; // consume SeekID payload - - const long long seekPosId = ReadID(pReader, pos, len); - - if (seekPosId != libwebm::kMkvSeekPosition) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume id - - const long long seekPosSize = ReadUInt(pReader, pos, len); - - if (seekPosSize <= 0) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume size - - if ((pos + seekPosSize) > stop) - return false; - - pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize); - - if (pEntry->pos < 0) - return false; - - pos += seekPosSize; // consume payload - - if (pos != stop) - return false; - - return true; -} - -Cues::Cues(Segment* pSegment, long long start_, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start_), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_cue_points(NULL), - m_count(0), - m_preload_count(0), - m_pos(start_) {} - -Cues::~Cues() { - const long n = m_count + m_preload_count; - - CuePoint** p = m_cue_points; - CuePoint** const q = p + n; - - while (p != q) { - CuePoint* const pCP = *p++; - assert(pCP); - - delete pCP; - } - - delete[] m_cue_points; -} - -long Cues::GetCount() const { - if (m_cue_points == NULL) - return -1; - - return m_count; // TODO: really ignore preload count? -} - -bool Cues::DoneParsing() const { - const long long stop = m_start + m_size; - return (m_pos >= stop); -} - -bool Cues::Init() const { - if (m_cue_points) - return true; - - if (m_count != 0 || m_preload_count != 0) - return false; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - const long long stop = m_start + m_size; - long long pos = m_start; - - long cue_points_size = 0; - - while (pos < stop) { - const long long idpos = pos; - - long len; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || (pos + len > stop)) { - return false; - } - - pos += len; // consume Size field - if (pos + size > stop) { - return false; - } - - if (id == libwebm::kMkvCuePoint) { - if (!PreloadCuePoint(cue_points_size, idpos)) - return false; - } - - pos += size; // skip payload - } - return true; -} - -bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { - if (m_count != 0) - return false; - - if (m_preload_count >= cue_points_size) { - const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size; - - CuePoint** const qq = new (std::nothrow) CuePoint*[n]; - if (qq == NULL) - return false; - - CuePoint** q = qq; // beginning of target - - CuePoint** p = m_cue_points; // beginning of source - CuePoint** const pp = p + m_preload_count; // end of source - - while (p != pp) - *q++ = *p++; - - delete[] m_cue_points; - - m_cue_points = qq; - cue_points_size = n; - } - - CuePoint* const pCP = new (std::nothrow) CuePoint(m_preload_count, pos); - if (pCP == NULL) - return false; - - m_cue_points[m_preload_count++] = pCP; - return true; -} - -bool Cues::LoadCuePoint() const { - const long long stop = m_start + m_size; - - if (m_pos >= stop) - return false; // nothing else to do - - if (!Init()) { - m_pos = stop; - return false; - } - - IMkvReader* const pReader = m_pSegment->m_pReader; - - while (m_pos < stop) { - const long long idpos = m_pos; - - long len; - - const long long id = ReadID(pReader, m_pos, len); - if (id < 0 || (m_pos + len) > stop) - return false; - - m_pos += len; // consume ID - - const long long size = ReadUInt(pReader, m_pos, len); - if (size < 0 || (m_pos + len) > stop) - return false; - - m_pos += len; // consume Size field - if ((m_pos + size) > stop) - return false; - - if (id != libwebm::kMkvCuePoint) { - m_pos += size; // consume payload - if (m_pos > stop) - return false; - - continue; - } - - if (m_preload_count < 1) - return false; - - CuePoint* const pCP = m_cue_points[m_count]; - if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))) - return false; - - if (!pCP->Load(pReader)) { - m_pos = stop; - return false; - } - ++m_count; - --m_preload_count; - - m_pos += size; // consume payload - if (m_pos > stop) - return false; - - return true; // yes, we loaded a cue point - } - - return false; // no, we did not load a cue point -} - -bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, - const CuePoint::TrackPosition*& pTP) const { - if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0) - return false; - - CuePoint** const ii = m_cue_points; - CuePoint** i = ii; - - CuePoint** const jj = ii + m_count; - CuePoint** j = jj; - - pCP = *i; - if (pCP == NULL) - return false; - - if (time_ns <= pCP->GetTime(m_pSegment)) { - pTP = pCP->Find(pTrack); - return (pTP != NULL); - } - - while (i < j) { - // INVARIANT: - //[ii, i) <= time_ns - //[i, j) ? - //[j, jj) > time_ns - - CuePoint** const k = i + (j - i) / 2; - if (k >= jj) - return false; - - CuePoint* const pCP = *k; - if (pCP == NULL) - return false; - - const long long t = pCP->GetTime(m_pSegment); - - if (t <= time_ns) - i = k + 1; - else - j = k; - - if (i > j) - return false; - } - - if (i != j || i > jj || i <= ii) - return false; - - pCP = *--i; - - if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns) - return false; - - // TODO: here and elsewhere, it's probably not correct to search - // for the cue point with this time, and then search for a matching - // track. In principle, the matching track could be on some earlier - // cue point, and with our current algorithm, we'd miss it. To make - // this bullet-proof, we'd need to create a secondary structure, - // with a list of cue points that apply to a track, and then search - // that track-based structure for a matching cue point. - - pTP = pCP->Find(pTrack); - return (pTP != NULL); -} - -const CuePoint* Cues::GetFirst() const { - if (m_cue_points == NULL || m_count == 0) - return NULL; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL) - return NULL; - - CuePoint* const pCP = pp[0]; - if (pCP == NULL || pCP->GetTimeCode() < 0) - return NULL; - - return pCP; -} - -const CuePoint* Cues::GetLast() const { - if (m_cue_points == NULL || m_count <= 0) - return NULL; - - const long index = m_count - 1; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL) - return NULL; - - CuePoint* const pCP = pp[index]; - if (pCP == NULL || pCP->GetTimeCode() < 0) - return NULL; - - return pCP; -} - -const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { - if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL || - m_count < 1) { - return NULL; - } - - long index = pCurr->m_index; - if (index >= m_count) - return NULL; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL || pp[index] != pCurr) - return NULL; - - ++index; - - if (index >= m_count) - return NULL; - - CuePoint* const pNext = pp[index]; - - if (pNext == NULL || pNext->GetTimeCode() < 0) - return NULL; - - return pNext; -} - -const BlockEntry* Cues::GetBlock(const CuePoint* pCP, - const CuePoint::TrackPosition* pTP) const { - if (pCP == NULL || pTP == NULL) - return NULL; - - return m_pSegment->GetBlock(*pCP, *pTP); -} - -const BlockEntry* Segment::GetBlock(const CuePoint& cp, - const CuePoint::TrackPosition& tp) { - Cluster** const ii = m_clusters; - Cluster** i = ii; - - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** const jj = ii + count; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[ii, i) < pTP->m_pos - //[i, j) ? - //[j, jj) > pTP->m_pos - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pCluster = *k; - assert(pCluster); - - // const long long pos_ = pCluster->m_pos; - // assert(pos_); - // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); - - const long long pos = pCluster->GetPosition(); - assert(pos >= 0); - - if (pos < tp.m_pos) - i = k + 1; - else if (pos > tp.m_pos) - j = k; - else - return pCluster->GetEntry(cp, tp); - } - - assert(i == j); - // assert(Cluster::HasBlockEntries(this, tp.m_pos)); - - Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1); - if (pCluster == NULL) - return NULL; - - const ptrdiff_t idx = i - m_clusters; - - if (!PreloadCluster(pCluster, idx)) { - delete pCluster; - return NULL; - } - assert(m_clusters); - assert(m_clusterPreloadCount > 0); - assert(m_clusters[idx] == pCluster); - - return pCluster->GetEntry(cp, tp); -} - -const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) { - if (requested_pos < 0) - return 0; - - Cluster** const ii = m_clusters; - Cluster** i = ii; - - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** const jj = ii + count; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[ii, i) < pTP->m_pos - //[i, j) ? - //[j, jj) > pTP->m_pos - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pCluster = *k; - assert(pCluster); - - // const long long pos_ = pCluster->m_pos; - // assert(pos_); - // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); - - const long long pos = pCluster->GetPosition(); - assert(pos >= 0); - - if (pos < requested_pos) - i = k + 1; - else if (pos > requested_pos) - j = k; - else - return pCluster; - } - - assert(i == j); - // assert(Cluster::HasBlockEntries(this, tp.m_pos)); - - Cluster* const pCluster = Cluster::Create(this, -1, requested_pos); - if (pCluster == NULL) - return NULL; - - const ptrdiff_t idx = i - m_clusters; - - if (!PreloadCluster(pCluster, idx)) { - delete pCluster; - return NULL; - } - assert(m_clusters); - assert(m_clusterPreloadCount > 0); - assert(m_clusters[idx] == pCluster); - - return pCluster; -} - -CuePoint::CuePoint(long idx, long long pos) - : m_element_start(0), - m_element_size(0), - m_index(idx), - m_timecode(-1 * pos), - m_track_positions(NULL), - m_track_positions_count(0) { - assert(pos > 0); -} - -CuePoint::~CuePoint() { delete[] m_track_positions; } - -bool CuePoint::Load(IMkvReader* pReader) { - // odbgstream os; - // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl; - - if (m_timecode >= 0) // already loaded - return true; - - assert(m_track_positions == NULL); - assert(m_track_positions_count == 0); - - long long pos_ = -m_timecode; - const long long element_start = pos_; - - long long stop; - - { - long len; - - const long long id = ReadID(pReader, pos_, len); - if (id != libwebm::kMkvCuePoint) - return false; - - pos_ += len; // consume ID - - const long long size = ReadUInt(pReader, pos_, len); - assert(size >= 0); - - pos_ += len; // consume Size field - // pos_ now points to start of payload - - stop = pos_ + size; - } - - const long long element_size = stop - element_start; - - long long pos = pos_; - - // First count number of track positions - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if ((id < 0) || (pos + len > stop)) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if ((size < 0) || (pos + len > stop)) { - return false; - } - - pos += len; // consume Size field - if ((pos + size) > stop) { - return false; - } - - if (id == libwebm::kMkvCueTime) - m_timecode = UnserializeUInt(pReader, pos, size); - - else if (id == libwebm::kMkvCueTrackPositions) - ++m_track_positions_count; - - pos += size; // consume payload - } - - if (m_timecode < 0 || m_track_positions_count <= 0) { - return false; - } - - // os << "CuePoint::Load(cont'd): idpos=" << idpos - // << " timecode=" << m_timecode - // << endl; - - m_track_positions = new (std::nothrow) TrackPosition[m_track_positions_count]; - if (m_track_positions == NULL) - return false; - - // Now parse track positions - - TrackPosition* p = m_track_positions; - pos = pos_; - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) - return false; - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - assert(size >= 0); - assert((pos + len) <= stop); - - pos += len; // consume Size field - assert((pos + size) <= stop); - - if (id == libwebm::kMkvCueTrackPositions) { - TrackPosition& tp = *p++; - if (!tp.Parse(pReader, pos, size)) { - return false; - } - } - - pos += size; // consume payload - if (pos > stop) - return false; - } - - assert(size_t(p - m_track_positions) == m_track_positions_count); - - m_element_start = element_start; - m_element_size = element_size; - - return true; -} - -bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, - long long size_) { - const long long stop = start_ + size_; - long long pos = start_; - - m_track = -1; - m_pos = -1; - m_block = 1; // default - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if ((id < 0) || ((pos + len) > stop)) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if ((size < 0) || ((pos + len) > stop)) { - return false; - } - - pos += len; // consume Size field - if ((pos + size) > stop) { - return false; - } - - if (id == libwebm::kMkvCueTrack) - m_track = UnserializeUInt(pReader, pos, size); - else if (id == libwebm::kMkvCueClusterPosition) - m_pos = UnserializeUInt(pReader, pos, size); - else if (id == libwebm::kMkvCueBlockNumber) - m_block = UnserializeUInt(pReader, pos, size); - - pos += size; // consume payload - } - - if ((m_pos < 0) || (m_track <= 0)) { - return false; - } - - return true; -} - -const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const { - if (pTrack == NULL) { - return NULL; - } - - const long long n = pTrack->GetNumber(); - - const TrackPosition* i = m_track_positions; - const TrackPosition* const j = i + m_track_positions_count; - - while (i != j) { - const TrackPosition& p = *i++; - - if (p.m_track == n) - return &p; - } - - return NULL; // no matching track number found -} - -long long CuePoint::GetTimeCode() const { return m_timecode; } - -long long CuePoint::GetTime(const Segment* pSegment) const { - assert(pSegment); - assert(m_timecode >= 0); - - const SegmentInfo* const pInfo = pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - const long long time = scale * m_timecode; - - return time; -} - -bool Segment::DoneParsing() const { - if (m_size < 0) { - long long total, avail; - - const int status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return true; // must assume done - - if (total < 0) - return false; // assume live stream - - return (m_pos >= total); - } - - const long long stop = m_start + m_size; - - return (m_pos >= stop); -} - -const Cluster* Segment::GetFirst() const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - Cluster* const pCluster = m_clusters[0]; - assert(pCluster); - - return pCluster; -} - -const Cluster* Segment::GetLast() const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - const long idx = m_clusterCount - 1; - - Cluster* const pCluster = m_clusters[idx]; - assert(pCluster); - - return pCluster; -} - -unsigned long Segment::GetCount() const { return m_clusterCount; } - -const Cluster* Segment::GetNext(const Cluster* pCurr) { - assert(pCurr); - assert(pCurr != &m_eos); - assert(m_clusters); - - long idx = pCurr->m_index; - - if (idx >= 0) { - assert(m_clusterCount > 0); - assert(idx < m_clusterCount); - assert(pCurr == m_clusters[idx]); - - ++idx; - - if (idx >= m_clusterCount) - return &m_eos; // caller will LoadCluster as desired - - Cluster* const pNext = m_clusters[idx]; - assert(pNext); - assert(pNext->m_index >= 0); - assert(pNext->m_index == idx); - - return pNext; - } - - assert(m_clusterPreloadCount > 0); - - long long pos = pCurr->m_element_start; - - assert(m_size >= 0); // TODO - const long long stop = m_start + m_size; // end of segment - - { - long len; - - long long result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); - assert((pos + len) <= stop); // TODO - if (result != 0) - return NULL; - - const long long id = ReadID(m_pReader, pos, len); - if (id != libwebm::kMkvCluster) - return NULL; - - pos += len; // consume ID - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); // TODO - assert((pos + len) <= stop); // TODO - - const long long size = ReadUInt(m_pReader, pos, len); - assert(size > 0); // TODO - // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); - - pos += len; // consume length of size of element - assert((pos + size) <= stop); // TODO - - // Pos now points to start of payload - - pos += size; // consume payload - } - - long long off_next = 0; - - while (pos < stop) { - long len; - - long long result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); - assert((pos + len) <= stop); // TODO - if (result != 0) - return NULL; - - const long long idpos = pos; // pos of next (potential) cluster - - const long long id = ReadID(m_pReader, idpos, len); - if (id < 0) - return NULL; - - pos += len; // consume ID - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); // TODO - assert((pos + len) <= stop); // TODO - - const long long size = ReadUInt(m_pReader, pos, len); - assert(size >= 0); // TODO - - pos += len; // consume length of size of element - assert((pos + size) <= stop); // TODO - - // Pos now points to start of payload - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvCluster) { - const long long off_next_ = idpos - m_start; - - long long pos_; - long len_; - - const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_); - - assert(status >= 0); - - if (status > 0) { - off_next = off_next_; - break; - } - } - - pos += size; // consume payload - } - - if (off_next <= 0) - return 0; - - Cluster** const ii = m_clusters + m_clusterCount; - Cluster** i = ii; - - Cluster** const jj = ii + m_clusterPreloadCount; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[0, i) < pos_next - //[i, j) ? - //[j, jj) > pos_next - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pNext = *k; - assert(pNext); - assert(pNext->m_index < 0); - - // const long long pos_ = pNext->m_pos; - // assert(pos_); - // pos = pos_ * ((pos_ < 0) ? -1 : 1); - - pos = pNext->GetPosition(); - - if (pos < off_next) - i = k + 1; - else if (pos > off_next) - j = k; - else - return pNext; - } - - assert(i == j); - - Cluster* const pNext = Cluster::Create(this, -1, off_next); - if (pNext == NULL) - return NULL; - - const ptrdiff_t idx_next = i - m_clusters; // insertion position - - if (!PreloadCluster(pNext, idx_next)) { - delete pNext; - return NULL; - } - assert(m_clusters); - assert(idx_next < m_clusterSize); - assert(m_clusters[idx_next] == pNext); - - return pNext; -} - -long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, - long long& pos, long& len) { - assert(pCurr); - assert(!pCurr->EOS()); - assert(m_clusters); - - pResult = 0; - - if (pCurr->m_index >= 0) { // loaded (not merely preloaded) - assert(m_clusters[pCurr->m_index] == pCurr); - - const long next_idx = pCurr->m_index + 1; - - if (next_idx < m_clusterCount) { - pResult = m_clusters[next_idx]; - return 0; // success - } - - // curr cluster is last among loaded - - const long result = LoadCluster(pos, len); - - if (result < 0) // error or underflow - return result; - - if (result > 0) // no more clusters - { - // pResult = &m_eos; - return 1; - } - - pResult = GetLast(); - return 0; // success - } - - assert(m_pos > 0); - - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - // interrogate curr cluster - - pos = pCurr->m_element_start; - - if (pCurr->m_element_size >= 0) - pos += pCurr->m_element_size; - else { - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadUInt(m_pReader, pos, len); - - if (id != libwebm::kMkvCluster) - return -1; - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) // TODO: should never happen - return E_FILE_FORMAT_INVALID; // TODO: resolve this - - // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - // Pos now points to start of payload - - pos += size; // consume payload (that is, the current cluster) - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - // By consuming the payload, we are assuming that the curr - // cluster isn't interesting. That is, we don't bother checking - // whether the payload of the curr cluster is less than what - // happens to be available (obtained via IMkvReader::Length). - // Presumably the caller has already dispensed with the current - // cluster, and really does want the next cluster. - } - - // pos now points to just beyond the last fully-loaded cluster - - for (;;) { - const long status = DoParseNext(pResult, pos, len); - - if (status <= 1) - return status; - } -} - -long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - // Parse next cluster. This is strictly a parsing activity. - // Creation of a new cluster object happens later, after the - // parsing is done. - - long long off_next = 0; - long long cluster_size = -1; - - for (;;) { - if ((total >= 0) && (pos >= total)) - return 1; // EOF - - if ((segment_stop >= 0) && (pos >= segment_stop)) - return 1; // EOF - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; // absolute - const long long idoff = pos - m_start; // relative - - const long long id = ReadID(m_pReader, idpos, len); // absolute - - if (id < 0) // error - return static_cast(id); - - if (id == 0) // weird - return -1; // generic error - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume length of size of element - - // Pos now points to start of payload - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if ((segment_stop >= 0) && (size != unknown_size) && - ((pos + size) > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - if (id == libwebm::kMkvCues) { - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - const long long element_stop = pos + size; - - if ((segment_stop >= 0) && (element_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - const long long element_start = idpos; - const long long element_size = element_stop - element_start; - - if (m_pCues == NULL) { - m_pCues = new (std::nothrow) - Cues(this, pos, size, element_start, element_size); - if (m_pCues == NULL) - return false; - } - - pos += size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - if (id != libwebm::kMkvCluster) { // not a Cluster ID - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - // We have a cluster. - off_next = idoff; - - if (size != unknown_size) - cluster_size = size; - - break; - } - - assert(off_next > 0); // have cluster - - // We have parsed the next cluster. - // We have not created a cluster object yet. What we need - // to do now is determine whether it has already be preloaded - //(in which case, an object for this cluster has already been - // created), and if not, create a new cluster object. - - Cluster** const ii = m_clusters + m_clusterCount; - Cluster** i = ii; - - Cluster** const jj = ii + m_clusterPreloadCount; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[0, i) < pos_next - //[i, j) ? - //[j, jj) > pos_next - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - const Cluster* const pNext = *k; - assert(pNext); - assert(pNext->m_index < 0); - - pos = pNext->GetPosition(); - assert(pos >= 0); - - if (pos < off_next) - i = k + 1; - else if (pos > off_next) - j = k; - else { - pResult = pNext; - return 0; // success - } - } - - assert(i == j); - - long long pos_; - long len_; - - status = Cluster::HasBlockEntries(this, off_next, pos_, len_); - - if (status < 0) { // error or underflow - pos = pos_; - len = len_; - - return status; - } - - if (status > 0) { // means "found at least one block entry" - Cluster* const pNext = Cluster::Create(this, - -1, // preloaded - off_next); - if (pNext == NULL) - return -1; - - const ptrdiff_t idx_next = i - m_clusters; // insertion position - - if (!PreloadCluster(pNext, idx_next)) { - delete pNext; - return -1; - } - assert(m_clusters); - assert(idx_next < m_clusterSize); - assert(m_clusters[idx_next] == pNext); - - pResult = pNext; - return 0; // success - } - - // status == 0 means "no block entries found" - - if (cluster_size < 0) { // unknown size - const long long payload_pos = pos; // absolute pos of cluster payload - - for (;;) { // determine cluster size - if ((total >= 0) && (pos >= total)) - break; - - if ((segment_stop >= 0) && (pos >= segment_stop)) - break; // no more clusters - - // Read ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) // error (or underflow) - return static_cast(id); - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues) - break; - - pos += len; // consume ID (of sub-element) - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field of element - - // pos now points to start of sub-element's payload - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // not allowed for sub-elements - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) // weird - return E_FILE_FORMAT_INVALID; - - pos += size; // consume payload of sub-element - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - } // determine cluster size - - cluster_size = pos - payload_pos; - assert(cluster_size >= 0); // TODO: handle cluster_size = 0 - - pos = payload_pos; // reset and re-parse original cluster - } - - pos += cluster_size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 2; // try to find a cluster that follows next -} - -const Cluster* Segment::FindCluster(long long time_ns) const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - { - Cluster* const pCluster = m_clusters[0]; - assert(pCluster); - assert(pCluster->m_index == 0); - - if (time_ns <= pCluster->GetTime()) - return pCluster; - } - - // Binary search of cluster array - - long i = 0; - long j = m_clusterCount; - - while (i < j) { - // INVARIANT: - //[0, i) <= time_ns - //[i, j) ? - //[j, m_clusterCount) > time_ns - - const long k = i + (j - i) / 2; - assert(k < m_clusterCount); - - Cluster* const pCluster = m_clusters[k]; - assert(pCluster); - assert(pCluster->m_index == k); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - i = k + 1; - else - j = k; - - assert(i <= j); - } - - assert(i == j); - assert(i > 0); - assert(i <= m_clusterCount); - - const long k = i - 1; - - Cluster* const pCluster = m_clusters[k]; - assert(pCluster); - assert(pCluster->m_index == k); - assert(pCluster->GetTime() <= time_ns); - - return pCluster; -} - -const Tracks* Segment::GetTracks() const { return m_pTracks; } -const SegmentInfo* Segment::GetInfo() const { return m_pInfo; } -const Cues* Segment::GetCues() const { return m_pCues; } -const Chapters* Segment::GetChapters() const { return m_pChapters; } -const Tags* Segment::GetTags() const { return m_pTags; } -const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; } - -long long Segment::GetDuration() const { - assert(m_pInfo); - return m_pInfo->GetDuration(); -} - -Chapters::Chapters(Segment* pSegment, long long payload_start, - long long payload_size, long long element_start, - long long element_size) - : m_pSegment(pSegment), - m_start(payload_start), - m_size(payload_size), - m_element_start(element_start), - m_element_size(element_size), - m_editions(NULL), - m_editions_size(0), - m_editions_count(0) {} - -Chapters::~Chapters() { - while (m_editions_count > 0) { - Edition& e = m_editions[--m_editions_count]; - e.Clear(); - } - delete[] m_editions; -} - -long Chapters::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; // payload start - const long long stop = pos + m_size; // payload stop - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvEditionEntry) { - status = ParseEdition(pos, size); - - if (status < 0) // error - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -int Chapters::GetEditionCount() const { return m_editions_count; } - -const Chapters::Edition* Chapters::GetEdition(int idx) const { - if (idx < 0) - return NULL; - - if (idx >= m_editions_count) - return NULL; - - return m_editions + idx; -} - -bool Chapters::ExpandEditionsArray() { - if (m_editions_size > m_editions_count) - return true; // nothing else to do - - const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size; - - Edition* const editions = new (std::nothrow) Edition[size]; - - if (editions == NULL) - return false; - - for (int idx = 0; idx < m_editions_count; ++idx) { - m_editions[idx].ShallowCopy(editions[idx]); - } - - delete[] m_editions; - m_editions = editions; - - m_editions_size = size; - return true; -} - -long Chapters::ParseEdition(long long pos, long long size) { - if (!ExpandEditionsArray()) - return -1; - - Edition& e = m_editions[m_editions_count++]; - e.Init(); - - return e.Parse(m_pSegment->m_pReader, pos, size); -} - -Chapters::Edition::Edition() {} - -Chapters::Edition::~Edition() {} - -int Chapters::Edition::GetAtomCount() const { return m_atoms_count; } - -const Chapters::Atom* Chapters::Edition::GetAtom(int index) const { - if (index < 0) - return NULL; - - if (index >= m_atoms_count) - return NULL; - - return m_atoms + index; -} - -void Chapters::Edition::Init() { - m_atoms = NULL; - m_atoms_size = 0; - m_atoms_count = 0; -} - -void Chapters::Edition::ShallowCopy(Edition& rhs) const { - rhs.m_atoms = m_atoms; - rhs.m_atoms_size = m_atoms_size; - rhs.m_atoms_count = m_atoms_count; -} - -void Chapters::Edition::Clear() { - while (m_atoms_count > 0) { - Atom& a = m_atoms[--m_atoms_count]; - a.Clear(); - } - - delete[] m_atoms; - m_atoms = NULL; - - m_atoms_size = 0; -} - -long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) - continue; - - if (id == libwebm::kMkvChapterAtom) { - status = ParseAtom(pReader, pos, size); - - if (status < 0) // error - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandAtomsArray()) - return -1; - - Atom& a = m_atoms[m_atoms_count++]; - a.Init(); - - return a.Parse(pReader, pos, size); -} - -bool Chapters::Edition::ExpandAtomsArray() { - if (m_atoms_size > m_atoms_count) - return true; // nothing else to do - - const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size; - - Atom* const atoms = new (std::nothrow) Atom[size]; - - if (atoms == NULL) - return false; - - for (int idx = 0; idx < m_atoms_count; ++idx) { - m_atoms[idx].ShallowCopy(atoms[idx]); - } - - delete[] m_atoms; - m_atoms = atoms; - - m_atoms_size = size; - return true; -} - -Chapters::Atom::Atom() {} - -Chapters::Atom::~Atom() {} - -unsigned long long Chapters::Atom::GetUID() const { return m_uid; } - -const char* Chapters::Atom::GetStringUID() const { return m_string_uid; } - -long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; } - -long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; } - -long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const { - return GetTime(pChapters, m_start_timecode); -} - -long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const { - return GetTime(pChapters, m_stop_timecode); -} - -int Chapters::Atom::GetDisplayCount() const { return m_displays_count; } - -const Chapters::Display* Chapters::Atom::GetDisplay(int index) const { - if (index < 0) - return NULL; - - if (index >= m_displays_count) - return NULL; - - return m_displays + index; -} - -void Chapters::Atom::Init() { - m_string_uid = NULL; - m_uid = 0; - m_start_timecode = -1; - m_stop_timecode = -1; - - m_displays = NULL; - m_displays_size = 0; - m_displays_count = 0; -} - -void Chapters::Atom::ShallowCopy(Atom& rhs) const { - rhs.m_string_uid = m_string_uid; - rhs.m_uid = m_uid; - rhs.m_start_timecode = m_start_timecode; - rhs.m_stop_timecode = m_stop_timecode; - - rhs.m_displays = m_displays; - rhs.m_displays_size = m_displays_size; - rhs.m_displays_count = m_displays_count; -} - -void Chapters::Atom::Clear() { - delete[] m_string_uid; - m_string_uid = NULL; - - while (m_displays_count > 0) { - Display& d = m_displays[--m_displays_count]; - d.Clear(); - } - - delete[] m_displays; - m_displays = NULL; - - m_displays_size = 0; -} - -long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // 0 length payload, skip. - continue; - - if (id == libwebm::kMkvChapterDisplay) { - status = ParseDisplay(pReader, pos, size); - - if (status < 0) // error - return status; - } else if (id == libwebm::kMkvChapterStringUID) { - status = UnserializeString(pReader, pos, size, m_string_uid); - - if (status < 0) // error - return status; - } else if (id == libwebm::kMkvChapterUID) { - long long val; - status = UnserializeInt(pReader, pos, size, val); - - if (status < 0) // error - return status; - - m_uid = static_cast(val); - } else if (id == libwebm::kMkvChapterTimeStart) { - const long long val = UnserializeUInt(pReader, pos, size); - - if (val < 0) // error - return static_cast(val); - - m_start_timecode = val; - } else if (id == libwebm::kMkvChapterTimeEnd) { - const long long val = UnserializeUInt(pReader, pos, size); - - if (val < 0) // error - return static_cast(val); - - m_stop_timecode = val; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long long Chapters::Atom::GetTime(const Chapters* pChapters, - long long timecode) { - if (pChapters == NULL) - return -1; - - Segment* const pSegment = pChapters->m_pSegment; - - if (pSegment == NULL) // weird - return -1; - - const SegmentInfo* const pInfo = pSegment->GetInfo(); - - if (pInfo == NULL) - return -1; - - const long long timecode_scale = pInfo->GetTimeCodeScale(); - - if (timecode_scale < 1) // weird - return -1; - - if (timecode < 0) - return -1; - - const long long result = timecode_scale * timecode; - - return result; -} - -long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandDisplaysArray()) - return -1; - - Display& d = m_displays[m_displays_count++]; - d.Init(); - - return d.Parse(pReader, pos, size); -} - -bool Chapters::Atom::ExpandDisplaysArray() { - if (m_displays_size > m_displays_count) - return true; // nothing else to do - - const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size; - - Display* const displays = new (std::nothrow) Display[size]; - - if (displays == NULL) - return false; - - for (int idx = 0; idx < m_displays_count; ++idx) { - m_displays[idx].ShallowCopy(displays[idx]); - } - - delete[] m_displays; - m_displays = displays; - - m_displays_size = size; - return true; -} - -Chapters::Display::Display() {} - -Chapters::Display::~Display() {} - -const char* Chapters::Display::GetString() const { return m_string; } - -const char* Chapters::Display::GetLanguage() const { return m_language; } - -const char* Chapters::Display::GetCountry() const { return m_country; } - -void Chapters::Display::Init() { - m_string = NULL; - m_language = NULL; - m_country = NULL; -} - -void Chapters::Display::ShallowCopy(Display& rhs) const { - rhs.m_string = m_string; - rhs.m_language = m_language; - rhs.m_country = m_country; -} - -void Chapters::Display::Clear() { - delete[] m_string; - m_string = NULL; - - delete[] m_language; - m_language = NULL; - - delete[] m_country; - m_country = NULL; -} - -long Chapters::Display::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // No payload. - continue; - - if (id == libwebm::kMkvChapString) { - status = UnserializeString(pReader, pos, size, m_string); - - if (status) - return status; - } else if (id == libwebm::kMkvChapLanguage) { - status = UnserializeString(pReader, pos, size, m_language); - - if (status) - return status; - } else if (id == libwebm::kMkvChapCountry) { - status = UnserializeString(pReader, pos, size, m_country); - - if (status) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -Tags::Tags(Segment* pSegment, long long payload_start, long long payload_size, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(payload_start), - m_size(payload_size), - m_element_start(element_start), - m_element_size(element_size), - m_tags(NULL), - m_tags_size(0), - m_tags_count(0) {} - -Tags::~Tags() { - while (m_tags_count > 0) { - Tag& t = m_tags[--m_tags_count]; - t.Clear(); - } - delete[] m_tags; -} - -long Tags::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; // payload start - const long long stop = pos + m_size; // payload stop - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) - return status; - - if (size == 0) // 0 length tag, read another - continue; - - if (id == libwebm::kMkvTag) { - status = ParseTag(pos, size); - - if (status < 0) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -int Tags::GetTagCount() const { return m_tags_count; } - -const Tags::Tag* Tags::GetTag(int idx) const { - if (idx < 0) - return NULL; - - if (idx >= m_tags_count) - return NULL; - - return m_tags + idx; -} - -bool Tags::ExpandTagsArray() { - if (m_tags_size > m_tags_count) - return true; // nothing else to do - - const int size = (m_tags_size == 0) ? 1 : 2 * m_tags_size; - - Tag* const tags = new (std::nothrow) Tag[size]; - - if (tags == NULL) - return false; - - for (int idx = 0; idx < m_tags_count; ++idx) { - m_tags[idx].ShallowCopy(tags[idx]); - } - - delete[] m_tags; - m_tags = tags; - - m_tags_size = size; - return true; -} - -long Tags::ParseTag(long long pos, long long size) { - if (!ExpandTagsArray()) - return -1; - - Tag& t = m_tags[m_tags_count++]; - t.Init(); - - return t.Parse(m_pSegment->m_pReader, pos, size); -} - -Tags::Tag::Tag() {} - -Tags::Tag::~Tag() {} - -int Tags::Tag::GetSimpleTagCount() const { return m_simple_tags_count; } - -const Tags::SimpleTag* Tags::Tag::GetSimpleTag(int index) const { - if (index < 0) - return NULL; - - if (index >= m_simple_tags_count) - return NULL; - - return m_simple_tags + index; -} - -void Tags::Tag::Init() { - m_simple_tags = NULL; - m_simple_tags_size = 0; - m_simple_tags_count = 0; -} - -void Tags::Tag::ShallowCopy(Tag& rhs) const { - rhs.m_simple_tags = m_simple_tags; - rhs.m_simple_tags_size = m_simple_tags_size; - rhs.m_simple_tags_count = m_simple_tags_count; -} - -void Tags::Tag::Clear() { - while (m_simple_tags_count > 0) { - SimpleTag& d = m_simple_tags[--m_simple_tags_count]; - d.Clear(); - } - - delete[] m_simple_tags; - m_simple_tags = NULL; - - m_simple_tags_size = 0; -} - -long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) - return status; - - if (size == 0) // 0 length tag, read another - continue; - - if (id == libwebm::kMkvSimpleTag) { - status = ParseSimpleTag(pReader, pos, size); - - if (status < 0) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long Tags::Tag::ParseSimpleTag(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandSimpleTagsArray()) - return -1; - - SimpleTag& st = m_simple_tags[m_simple_tags_count++]; - st.Init(); - - return st.Parse(pReader, pos, size); -} - -bool Tags::Tag::ExpandSimpleTagsArray() { - if (m_simple_tags_size > m_simple_tags_count) - return true; // nothing else to do - - const int size = (m_simple_tags_size == 0) ? 1 : 2 * m_simple_tags_size; - - SimpleTag* const displays = new (std::nothrow) SimpleTag[size]; - - if (displays == NULL) - return false; - - for (int idx = 0; idx < m_simple_tags_count; ++idx) { - m_simple_tags[idx].ShallowCopy(displays[idx]); - } - - delete[] m_simple_tags; - m_simple_tags = displays; - - m_simple_tags_size = size; - return true; -} - -Tags::SimpleTag::SimpleTag() {} - -Tags::SimpleTag::~SimpleTag() {} - -const char* Tags::SimpleTag::GetTagName() const { return m_tag_name; } - -const char* Tags::SimpleTag::GetTagString() const { return m_tag_string; } - -void Tags::SimpleTag::Init() { - m_tag_name = NULL; - m_tag_string = NULL; -} - -void Tags::SimpleTag::ShallowCopy(SimpleTag& rhs) const { - rhs.m_tag_name = m_tag_name; - rhs.m_tag_string = m_tag_string; -} - -void Tags::SimpleTag::Clear() { - delete[] m_tag_name; - m_tag_name = NULL; - - delete[] m_tag_string; - m_tag_string = NULL; -} - -long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvTagName) { - status = UnserializeString(pReader, pos, size, m_tag_name); - - if (status) - return status; - } else if (id == libwebm::kMkvTagString) { - status = UnserializeString(pReader, pos, size, m_tag_string); - - if (status) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_pMuxingAppAsUTF8(NULL), - m_pWritingAppAsUTF8(NULL), - m_pTitleAsUTF8(NULL) {} - -SegmentInfo::~SegmentInfo() { - delete[] m_pMuxingAppAsUTF8; - m_pMuxingAppAsUTF8 = NULL; - - delete[] m_pWritingAppAsUTF8; - m_pWritingAppAsUTF8 = NULL; - - delete[] m_pTitleAsUTF8; - m_pTitleAsUTF8 = NULL; -} - -long SegmentInfo::Parse() { - assert(m_pMuxingAppAsUTF8 == NULL); - assert(m_pWritingAppAsUTF8 == NULL); - assert(m_pTitleAsUTF8 == NULL); - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; - const long long stop = m_start + m_size; - - m_timecodeScale = 1000000; - m_duration = -1; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvTimecodeScale) { - m_timecodeScale = UnserializeUInt(pReader, pos, size); - - if (m_timecodeScale <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDuration) { - const long status = UnserializeFloat(pReader, pos, size, m_duration); - - if (status < 0) - return status; - - if (m_duration < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvMuxingApp) { - const long status = - UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvWritingApp) { - const long status = - UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvTitle) { - const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8); - - if (status) - return status; - } - - pos += size; - - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - const double rollover_check = m_duration * m_timecodeScale; - if (rollover_check > static_cast(LLONG_MAX)) - return E_FILE_FORMAT_INVALID; - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; } - -long long SegmentInfo::GetDuration() const { - if (m_duration < 0) - return -1; - - assert(m_timecodeScale >= 1); - - const double dd = double(m_duration) * double(m_timecodeScale); - const long long d = static_cast(dd); - - return d; -} - -const char* SegmentInfo::GetMuxingAppAsUTF8() const { - return m_pMuxingAppAsUTF8; -} - -const char* SegmentInfo::GetWritingAppAsUTF8() const { - return m_pWritingAppAsUTF8; -} - -const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; } - -/////////////////////////////////////////////////////////////// -// ContentEncoding element -ContentEncoding::ContentCompression::ContentCompression() - : algo(0), settings(NULL), settings_len(0) {} - -ContentEncoding::ContentCompression::~ContentCompression() { - delete[] settings; -} - -ContentEncoding::ContentEncryption::ContentEncryption() - : algo(0), - key_id(NULL), - key_id_len(0), - signature(NULL), - signature_len(0), - sig_key_id(NULL), - sig_key_id_len(0), - sig_algo(0), - sig_hash_algo(0) {} - -ContentEncoding::ContentEncryption::~ContentEncryption() { - delete[] key_id; - delete[] signature; - delete[] sig_key_id; -} - -ContentEncoding::ContentEncoding() - : compression_entries_(NULL), - compression_entries_end_(NULL), - encryption_entries_(NULL), - encryption_entries_end_(NULL), - encoding_order_(0), - encoding_scope_(1), - encoding_type_(0) {} - -ContentEncoding::~ContentEncoding() { - ContentCompression** comp_i = compression_entries_; - ContentCompression** const comp_j = compression_entries_end_; - - while (comp_i != comp_j) { - ContentCompression* const comp = *comp_i++; - delete comp; - } - - delete[] compression_entries_; - - ContentEncryption** enc_i = encryption_entries_; - ContentEncryption** const enc_j = encryption_entries_end_; - - while (enc_i != enc_j) { - ContentEncryption* const enc = *enc_i++; - delete enc; - } - - delete[] encryption_entries_; -} - -const ContentEncoding::ContentCompression* -ContentEncoding::GetCompressionByIndex(unsigned long idx) const { - const ptrdiff_t count = compression_entries_end_ - compression_entries_; - assert(count >= 0); - - if (idx >= static_cast(count)) - return NULL; - - return compression_entries_[idx]; -} - -unsigned long ContentEncoding::GetCompressionCount() const { - const ptrdiff_t count = compression_entries_end_ - compression_entries_; - assert(count >= 0); - - return static_cast(count); -} - -const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex( - unsigned long idx) const { - const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; - assert(count >= 0); - - if (idx >= static_cast(count)) - return NULL; - - return encryption_entries_[idx]; -} - -unsigned long ContentEncoding::GetEncryptionCount() const { - const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; - assert(count >= 0); - - return static_cast(count); -} - -long ContentEncoding::ParseContentEncAESSettingsEntry( - long long start, long long size, IMkvReader* pReader, - ContentEncAESSettings* aes) { - assert(pReader); - assert(aes); - - long long pos = start; - const long long stop = start + size; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvAESSettingsCipherMode) { - aes->cipher_mode = UnserializeUInt(pReader, pos, size); - if (aes->cipher_mode != 1) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; -} - -long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, - IMkvReader* pReader) { - assert(pReader); - - long long pos = start; - const long long stop = start + size; - - // Count ContentCompression and ContentEncryption elements. - int compression_count = 0; - int encryption_count = 0; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentCompression) - ++compression_count; - - if (id == libwebm::kMkvContentEncryption) - ++encryption_count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (compression_count <= 0 && encryption_count <= 0) - return -1; - - if (compression_count > 0) { - compression_entries_ = - new (std::nothrow) ContentCompression*[compression_count]; - if (!compression_entries_) - return -1; - compression_entries_end_ = compression_entries_; - } - - if (encryption_count > 0) { - encryption_entries_ = - new (std::nothrow) ContentEncryption*[encryption_count]; - if (!encryption_entries_) { - delete[] compression_entries_; - return -1; - } - encryption_entries_end_ = encryption_entries_; - } - - pos = start; - while (pos < stop) { - long long id, size; - long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentEncodingOrder) { - encoding_order_ = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentEncodingScope) { - encoding_scope_ = UnserializeUInt(pReader, pos, size); - if (encoding_scope_ < 1) - return -1; - } else if (id == libwebm::kMkvContentEncodingType) { - encoding_type_ = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentCompression) { - ContentCompression* const compression = - new (std::nothrow) ContentCompression(); - if (!compression) - return -1; - - status = ParseCompressionEntry(pos, size, pReader, compression); - if (status) { - delete compression; - return status; - } - *compression_entries_end_++ = compression; - } else if (id == libwebm::kMkvContentEncryption) { - ContentEncryption* const encryption = - new (std::nothrow) ContentEncryption(); - if (!encryption) - return -1; - - status = ParseEncryptionEntry(pos, size, pReader, encryption); - if (status) { - delete encryption; - return status; - } - *encryption_entries_end_++ = encryption; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long ContentEncoding::ParseCompressionEntry(long long start, long long size, - IMkvReader* pReader, - ContentCompression* compression) { - assert(pReader); - assert(compression); - - long long pos = start; - const long long stop = start + size; - - bool valid = false; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentCompAlgo) { - long long algo = UnserializeUInt(pReader, pos, size); - if (algo < 0) - return E_FILE_FORMAT_INVALID; - compression->algo = algo; - valid = true; - } else if (id == libwebm::kMkvContentCompSettings) { - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - compression->settings = buf; - compression->settings_len = buflen; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - // ContentCompAlgo is mandatory - if (!valid) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long ContentEncoding::ParseEncryptionEntry(long long start, long long size, - IMkvReader* pReader, - ContentEncryption* encryption) { - assert(pReader); - assert(encryption); - - long long pos = start; - const long long stop = start + size; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentEncAlgo) { - encryption->algo = UnserializeUInt(pReader, pos, size); - if (encryption->algo != 5) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvContentEncKeyID) { - delete[] encryption->key_id; - encryption->key_id = NULL; - encryption->key_id_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->key_id = buf; - encryption->key_id_len = buflen; - } else if (id == libwebm::kMkvContentSignature) { - delete[] encryption->signature; - encryption->signature = NULL; - encryption->signature_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->signature = buf; - encryption->signature_len = buflen; - } else if (id == libwebm::kMkvContentSigKeyID) { - delete[] encryption->sig_key_id; - encryption->sig_key_id = NULL; - encryption->sig_key_id_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->sig_key_id = buf; - encryption->sig_key_id_len = buflen; - } else if (id == libwebm::kMkvContentSigAlgo) { - encryption->sig_algo = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentSigHashAlgo) { - encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentEncAESSettings) { - const long status = ParseContentEncAESSettingsEntry( - pos, size, pReader, &encryption->aes_settings); - if (status) - return status; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; -} - -Track::Track(Segment* pSegment, long long element_start, long long element_size) - : m_pSegment(pSegment), - m_element_start(element_start), - m_element_size(element_size), - content_encoding_entries_(NULL), - content_encoding_entries_end_(NULL) {} - -Track::~Track() { - Info& info = const_cast(m_info); - info.Clear(); - - ContentEncoding** i = content_encoding_entries_; - ContentEncoding** const j = content_encoding_entries_end_; - - while (i != j) { - ContentEncoding* const encoding = *i++; - delete encoding; - } - - delete[] content_encoding_entries_; -} - -long Track::Create(Segment* pSegment, const Info& info, long long element_start, - long long element_size, Track*& pResult) { - if (pResult) - return -1; - - Track* const pTrack = - new (std::nothrow) Track(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { // error - delete pTrack; - return status; - } - - pResult = pTrack; - return 0; // success -} - -Track::Info::Info() - : uid(0), - defaultDuration(0), - codecDelay(0), - seekPreRoll(0), - nameAsUTF8(NULL), - language(NULL), - codecId(NULL), - codecNameAsUTF8(NULL), - codecPrivate(NULL), - codecPrivateSize(0), - lacing(false) {} - -Track::Info::~Info() { Clear(); } - -void Track::Info::Clear() { - delete[] nameAsUTF8; - nameAsUTF8 = NULL; - - delete[] language; - language = NULL; - - delete[] codecId; - codecId = NULL; - - delete[] codecPrivate; - codecPrivate = NULL; - codecPrivateSize = 0; - - delete[] codecNameAsUTF8; - codecNameAsUTF8 = NULL; -} - -int Track::Info::CopyStr(char* Info::*str, Info& dst_) const { - if (str == static_cast(NULL)) - return -1; - - char*& dst = dst_.*str; - - if (dst) // should be NULL already - return -1; - - const char* const src = this->*str; - - if (src == NULL) - return 0; - - const size_t len = strlen(src); - - dst = SafeArrayAlloc(1, len + 1); - - if (dst == NULL) - return -1; - - strcpy(dst, src); - - return 0; -} - -int Track::Info::Copy(Info& dst) const { - if (&dst == this) - return 0; - - dst.type = type; - dst.number = number; - dst.defaultDuration = defaultDuration; - dst.codecDelay = codecDelay; - dst.seekPreRoll = seekPreRoll; - dst.uid = uid; - dst.lacing = lacing; - dst.settings = settings; - - // We now copy the string member variables from src to dst. - // This involves memory allocation so in principle the operation - // can fail (indeed, that's why we have Info::Copy), so we must - // report this to the caller. An error return from this function - // therefore implies that the copy was only partially successful. - - if (int status = CopyStr(&Info::nameAsUTF8, dst)) - return status; - - if (int status = CopyStr(&Info::language, dst)) - return status; - - if (int status = CopyStr(&Info::codecId, dst)) - return status; - - if (int status = CopyStr(&Info::codecNameAsUTF8, dst)) - return status; - - if (codecPrivateSize > 0) { - if (codecPrivate == NULL) - return -1; - - if (dst.codecPrivate) - return -1; - - if (dst.codecPrivateSize != 0) - return -1; - - dst.codecPrivate = SafeArrayAlloc(1, codecPrivateSize); - - if (dst.codecPrivate == NULL) - return -1; - - memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize); - dst.codecPrivateSize = codecPrivateSize; - } - - return 0; -} - -const BlockEntry* Track::GetEOS() const { return &m_eos; } - -long Track::GetType() const { return m_info.type; } - -long Track::GetNumber() const { return m_info.number; } - -unsigned long long Track::GetUid() const { return m_info.uid; } - -const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; } - -const char* Track::GetLanguage() const { return m_info.language; } - -const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; } - -const char* Track::GetCodecId() const { return m_info.codecId; } - -const unsigned char* Track::GetCodecPrivate(size_t& size) const { - size = m_info.codecPrivateSize; - return m_info.codecPrivate; -} - -bool Track::GetLacing() const { return m_info.lacing; } - -unsigned long long Track::GetDefaultDuration() const { - return m_info.defaultDuration; -} - -unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; } - -unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; } - -long Track::GetFirst(const BlockEntry*& pBlockEntry) const { - const Cluster* pCluster = m_pSegment->GetFirst(); - - for (int i = 0;;) { - if (pCluster == NULL) { - pBlockEntry = GetEOS(); - return 1; - } - - if (pCluster->EOS()) { - if (m_pSegment->DoneParsing()) { - pBlockEntry = GetEOS(); - return 1; - } - - pBlockEntry = 0; - return E_BUFFER_NOT_FULL; - } - - long status = pCluster->GetFirst(pBlockEntry); - - if (status < 0) // error - return status; - - if (pBlockEntry == 0) { // empty cluster - pCluster = m_pSegment->GetNext(pCluster); - continue; - } - - for (;;) { - const Block* const pBlock = pBlockEntry->GetBlock(); - assert(pBlock); - - const long long tn = pBlock->GetTrackNumber(); - - if ((tn == m_info.number) && VetEntry(pBlockEntry)) - return 0; - - const BlockEntry* pNextEntry; - - status = pCluster->GetNext(pBlockEntry, pNextEntry); - - if (status < 0) // error - return status; - - if (pNextEntry == 0) - break; - - pBlockEntry = pNextEntry; - } - - ++i; - - if (i >= 100) - break; - - pCluster = m_pSegment->GetNext(pCluster); - } - - // NOTE: if we get here, it means that we didn't find a block with - // a matching track number. We interpret that as an error (which - // might be too conservative). - - pBlockEntry = GetEOS(); // so we can return a non-NULL value - return 1; -} - -long Track::GetNext(const BlockEntry* pCurrEntry, - const BlockEntry*& pNextEntry) const { - assert(pCurrEntry); - assert(!pCurrEntry->EOS()); //? - - const Block* const pCurrBlock = pCurrEntry->GetBlock(); - assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number); - if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number) - return -1; - - const Cluster* pCluster = pCurrEntry->GetCluster(); - assert(pCluster); - assert(!pCluster->EOS()); - - long status = pCluster->GetNext(pCurrEntry, pNextEntry); - - if (status < 0) // error - return status; - - for (int i = 0;;) { - while (pNextEntry) { - const Block* const pNextBlock = pNextEntry->GetBlock(); - assert(pNextBlock); - - if (pNextBlock->GetTrackNumber() == m_info.number) - return 0; - - pCurrEntry = pNextEntry; - - status = pCluster->GetNext(pCurrEntry, pNextEntry); - - if (status < 0) // error - return status; - } - - pCluster = m_pSegment->GetNext(pCluster); - - if (pCluster == NULL) { - pNextEntry = GetEOS(); - return 1; - } - - if (pCluster->EOS()) { - if (m_pSegment->DoneParsing()) { - pNextEntry = GetEOS(); - return 1; - } - - // TODO: there is a potential O(n^2) problem here: we tell the - // caller to (pre)load another cluster, which he does, but then he - // calls GetNext again, which repeats the same search. This is - // a pathological case, since the only way it can happen is if - // there exists a long sequence of clusters none of which contain a - // block from this track. One way around this problem is for the - // caller to be smarter when he loads another cluster: don't call - // us back until you have a cluster that contains a block from this - // track. (Of course, that's not cheap either, since our caller - // would have to scan the each cluster as it's loaded, so that - // would just push back the problem.) - - pNextEntry = NULL; - return E_BUFFER_NOT_FULL; - } - - status = pCluster->GetFirst(pNextEntry); - - if (status < 0) // error - return status; - - if (pNextEntry == NULL) // empty cluster - continue; - - ++i; - - if (i >= 100) - break; - } - - // NOTE: if we get here, it means that we didn't find a block with - // a matching track number after lots of searching, so we give - // up trying. - - pNextEntry = GetEOS(); // so we can return a non-NULL value - return 1; -} - -bool Track::VetEntry(const BlockEntry* pBlockEntry) const { - assert(pBlockEntry); - const Block* const pBlock = pBlockEntry->GetBlock(); - assert(pBlock); - assert(pBlock->GetTrackNumber() == m_info.number); - if (!pBlock || pBlock->GetTrackNumber() != m_info.number) - return false; - - // This function is used during a seek to determine whether the - // frame is a valid seek target. This default function simply - // returns true, which means all frames are valid seek targets. - // It gets overridden by the VideoTrack class, because only video - // keyframes can be used as seek target. - - return true; -} - -long Track::Seek(long long time_ns, const BlockEntry*& pResult) const { - const long status = GetFirst(pResult); - - if (status < 0) // buffer underflow, etc - return status; - - assert(pResult); - - if (pResult->EOS()) - return 0; - - const Cluster* pCluster = pResult->GetCluster(); - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - - if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) - return 0; - - Cluster** const clusters = m_pSegment->m_clusters; - assert(clusters); - - const long count = m_pSegment->GetCount(); // loaded only, not preloaded - assert(count > 0); - - Cluster** const i = clusters + pCluster->GetIndex(); - assert(i); - assert(*i == pCluster); - assert(pCluster->GetTime() <= time_ns); - - Cluster** const j = clusters + count; - - Cluster** lo = i; - Cluster** hi = j; - - while (lo < hi) { - // INVARIANT: - //[i, lo) <= time_ns - //[lo, hi) ? - //[hi, j) > time_ns - - Cluster** const mid = lo + (hi - lo) / 2; - assert(mid < hi); - - pCluster = *mid; - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - lo = mid + 1; - else - hi = mid; - - assert(lo <= hi); - } - - assert(lo == hi); - assert(lo > i); - assert(lo <= j); - - while (lo > i) { - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this); - - if ((pResult != 0) && !pResult->EOS()) - return 0; - - // landed on empty cluster (no entries) - } - - pResult = GetEOS(); // weird - return 0; -} - -const ContentEncoding* Track::GetContentEncodingByIndex( - unsigned long idx) const { - const ptrdiff_t count = - content_encoding_entries_end_ - content_encoding_entries_; - assert(count >= 0); - - if (idx >= static_cast(count)) - return NULL; - - return content_encoding_entries_[idx]; -} - -unsigned long Track::GetContentEncodingCount() const { - const ptrdiff_t count = - content_encoding_entries_end_ - content_encoding_entries_; - assert(count >= 0); - - return static_cast(count); -} - -long Track::ParseContentEncodingsEntry(long long start, long long size) { - IMkvReader* const pReader = m_pSegment->m_pReader; - assert(pReader); - - long long pos = start; - const long long stop = start + size; - - // Count ContentEncoding elements. - int count = 0; - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - // pos now designates start of element - if (id == libwebm::kMkvContentEncoding) - ++count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (count <= 0) - return -1; - - content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count]; - if (!content_encoding_entries_) - return -1; - - content_encoding_entries_end_ = content_encoding_entries_; - - pos = start; - while (pos < stop) { - long long id, size; - long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - // pos now designates start of element - if (id == libwebm::kMkvContentEncoding) { - ContentEncoding* const content_encoding = - new (std::nothrow) ContentEncoding(); - if (!content_encoding) - return -1; - - status = content_encoding->ParseContentEncodingEntry(pos, size, pReader); - if (status) { - delete content_encoding; - return status; - } - - *content_encoding_entries_end_++ = content_encoding; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {} - -BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; } - -const Block* Track::EOSBlock::GetBlock() const { return NULL; } - -bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos, - long long value_size, bool is_x, - PrimaryChromaticity** chromaticity) { - if (!reader) - return false; - - if (!*chromaticity) - *chromaticity = new PrimaryChromaticity(); - - if (!*chromaticity) - return false; - - PrimaryChromaticity* pc = *chromaticity; - float* value = is_x ? &pc->x : &pc->y; - - double parser_value = 0; - const long long parse_status = - UnserializeFloat(reader, read_pos, value_size, parser_value); - - // Valid range is [0, 1]. Make sure the double is representable as a float - // before casting. - if (parse_status < 0 || parser_value < 0.0 || parser_value > 1.0 || - (parser_value > 0.0 && parser_value < FLT_MIN)) - return false; - - *value = static_cast(parser_value); - - return true; -} - -bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, - long long mm_size, MasteringMetadata** mm) { - if (!reader || *mm) - return false; - - std::unique_ptr mm_ptr(new MasteringMetadata()); - if (!mm_ptr.get()) - return false; - - const long long mm_end = mm_start + mm_size; - long long read_pos = mm_start; - - while (read_pos < mm_end) { - long long child_id = 0; - long long child_size = 0; - - const long long status = - ParseElementHeader(reader, read_pos, mm_end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvLuminanceMax) { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - if (value < -FLT_MAX || value > FLT_MAX || - (value > 0.0 && value < FLT_MIN)) { - return false; - } - mm_ptr->luminance_max = static_cast(value); - if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 || - mm_ptr->luminance_max > 9999.99) { - return false; - } - } else if (child_id == libwebm::kMkvLuminanceMin) { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - if (value < -FLT_MAX || value > FLT_MAX || - (value > 0.0 && value < FLT_MIN)) { - return false; - } - mm_ptr->luminance_min = static_cast(value); - if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 || - mm_ptr->luminance_min > 999.9999) { - return false; - } - } else { - bool is_x = false; - PrimaryChromaticity** chromaticity; - switch (child_id) { - case libwebm::kMkvPrimaryRChromaticityX: - case libwebm::kMkvPrimaryRChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryRChromaticityX; - chromaticity = &mm_ptr->r; - break; - case libwebm::kMkvPrimaryGChromaticityX: - case libwebm::kMkvPrimaryGChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryGChromaticityX; - chromaticity = &mm_ptr->g; - break; - case libwebm::kMkvPrimaryBChromaticityX: - case libwebm::kMkvPrimaryBChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryBChromaticityX; - chromaticity = &mm_ptr->b; - break; - case libwebm::kMkvWhitePointChromaticityX: - case libwebm::kMkvWhitePointChromaticityY: - is_x = child_id == libwebm::kMkvWhitePointChromaticityX; - chromaticity = &mm_ptr->white_point; - break; - default: - return false; - } - const bool value_parse_status = PrimaryChromaticity::Parse( - reader, read_pos, child_size, is_x, chromaticity); - if (!value_parse_status) - return false; - } - - read_pos += child_size; - if (read_pos > mm_end) - return false; - } - - *mm = mm_ptr.release(); - return true; -} - -bool Colour::Parse(IMkvReader* reader, long long colour_start, - long long colour_size, Colour** colour) { - if (!reader || *colour) - return false; - - std::unique_ptr colour_ptr(new Colour()); - if (!colour_ptr.get()) - return false; - - const long long colour_end = colour_start + colour_size; - long long read_pos = colour_start; - - while (read_pos < colour_end) { - long long child_id = 0; - long long child_size = 0; - - const long status = - ParseElementHeader(reader, read_pos, colour_end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvMatrixCoefficients) { - colour_ptr->matrix_coefficients = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->matrix_coefficients < 0) - return false; - } else if (child_id == libwebm::kMkvBitsPerChannel) { - colour_ptr->bits_per_channel = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->bits_per_channel < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) { - colour_ptr->chroma_subsampling_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_subsampling_horz < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSubsamplingVert) { - colour_ptr->chroma_subsampling_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_subsampling_vert < 0) - return false; - } else if (child_id == libwebm::kMkvCbSubsamplingHorz) { - colour_ptr->cb_subsampling_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->cb_subsampling_horz < 0) - return false; - } else if (child_id == libwebm::kMkvCbSubsamplingVert) { - colour_ptr->cb_subsampling_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->cb_subsampling_vert < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSitingHorz) { - colour_ptr->chroma_siting_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_siting_horz < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSitingVert) { - colour_ptr->chroma_siting_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_siting_vert < 0) - return false; - } else if (child_id == libwebm::kMkvRange) { - colour_ptr->range = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->range < 0) - return false; - } else if (child_id == libwebm::kMkvTransferCharacteristics) { - colour_ptr->transfer_characteristics = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->transfer_characteristics < 0) - return false; - } else if (child_id == libwebm::kMkvPrimaries) { - colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->primaries < 0) - return false; - } else if (child_id == libwebm::kMkvMaxCLL) { - colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->max_cll < 0) - return false; - } else if (child_id == libwebm::kMkvMaxFALL) { - colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->max_fall < 0) - return false; - } else if (child_id == libwebm::kMkvMasteringMetadata) { - if (!MasteringMetadata::Parse(reader, read_pos, child_size, - &colour_ptr->mastering_metadata)) - return false; - } else { - return false; - } - - read_pos += child_size; - if (read_pos > colour_end) - return false; - } - *colour = colour_ptr.release(); - return true; -} - -bool Projection::Parse(IMkvReader* reader, long long start, long long size, - Projection** projection) { - if (!reader || *projection) - return false; - - std::unique_ptr projection_ptr(new Projection()); - if (!projection_ptr.get()) - return false; - - const long long end = start + size; - long long read_pos = start; - - while (read_pos < end) { - long long child_id = 0; - long long child_size = 0; - - const long long status = - ParseElementHeader(reader, read_pos, end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvProjectionType) { - long long projection_type = kTypeNotPresent; - projection_type = UnserializeUInt(reader, read_pos, child_size); - if (projection_type < 0) - return false; - - projection_ptr->type = static_cast(projection_type); - } else if (child_id == libwebm::kMkvProjectionPrivate) { - unsigned char* data = SafeArrayAlloc(1, child_size); - - if (data == NULL) - return false; - - const int status = - reader->Read(read_pos, static_cast(child_size), data); - - if (status) { - delete[] data; - return false; - } - - projection_ptr->private_data = data; - projection_ptr->private_data_length = static_cast(child_size); - } else { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - // Make sure value is representable as a float before casting. - if (value_parse_status < 0 || value < -FLT_MAX || value > FLT_MAX || - (value > 0.0 && value < FLT_MIN)) { - return false; - } - - switch (child_id) { - case libwebm::kMkvProjectionPoseYaw: - projection_ptr->pose_yaw = static_cast(value); - break; - case libwebm::kMkvProjectionPosePitch: - projection_ptr->pose_pitch = static_cast(value); - break; - case libwebm::kMkvProjectionPoseRoll: - projection_ptr->pose_roll = static_cast(value); - break; - default: - return false; - } - } - - read_pos += child_size; - if (read_pos > end) - return false; - } - - *projection = projection_ptr.release(); - return true; -} - -VideoTrack::VideoTrack(Segment* pSegment, long long element_start, - long long element_size) - : Track(pSegment, element_start, element_size), - m_colour(NULL), - m_projection(NULL) {} - -VideoTrack::~VideoTrack() { - delete m_colour; - delete m_projection; -} - -long VideoTrack::Parse(Segment* pSegment, const Info& info, - long long element_start, long long element_size, - VideoTrack*& pResult) { - if (pResult) - return -1; - - if (info.type != Track::kVideo) - return -1; - - long long width = 0; - long long height = 0; - long long display_width = 0; - long long display_height = 0; - long long display_unit = 0; - long long stereo_mode = 0; - - double rate = 0.0; - - IMkvReader* const pReader = pSegment->m_pReader; - - const Settings& s = info.settings; - assert(s.start >= 0); - assert(s.size >= 0); - - long long pos = s.start; - assert(pos >= 0); - - const long long stop = pos + s.size; - - Colour* colour = NULL; - Projection* projection = NULL; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvPixelWidth) { - width = UnserializeUInt(pReader, pos, size); - - if (width <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvPixelHeight) { - height = UnserializeUInt(pReader, pos, size); - - if (height <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayWidth) { - display_width = UnserializeUInt(pReader, pos, size); - - if (display_width <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayHeight) { - display_height = UnserializeUInt(pReader, pos, size); - - if (display_height <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayUnit) { - display_unit = UnserializeUInt(pReader, pos, size); - - if (display_unit < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvStereoMode) { - stereo_mode = UnserializeUInt(pReader, pos, size); - - if (stereo_mode < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvFrameRate) { - const long status = UnserializeFloat(pReader, pos, size, rate); - - if (status < 0) - return status; - - if (rate <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvColour) { - if (!Colour::Parse(pReader, pos, size, &colour)) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvProjection) { - if (!Projection::Parse(pReader, pos, size, &projection)) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - VideoTrack* const pTrack = - new (std::nothrow) VideoTrack(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { // error - delete pTrack; - return status; - } - - pTrack->m_width = width; - pTrack->m_height = height; - pTrack->m_display_width = display_width; - pTrack->m_display_height = display_height; - pTrack->m_display_unit = display_unit; - pTrack->m_stereo_mode = stereo_mode; - pTrack->m_rate = rate; - pTrack->m_colour = colour; - pTrack->m_projection = projection; - - pResult = pTrack; - return 0; // success -} - -bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const { - return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey(); -} - -long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const { - const long status = GetFirst(pResult); - - if (status < 0) // buffer underflow, etc - return status; - - assert(pResult); - - if (pResult->EOS()) - return 0; - - const Cluster* pCluster = pResult->GetCluster(); - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - - if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) - return 0; - - Cluster** const clusters = m_pSegment->m_clusters; - assert(clusters); - - const long count = m_pSegment->GetCount(); // loaded only, not pre-loaded - assert(count > 0); - - Cluster** const i = clusters + pCluster->GetIndex(); - assert(i); - assert(*i == pCluster); - assert(pCluster->GetTime() <= time_ns); - - Cluster** const j = clusters + count; - - Cluster** lo = i; - Cluster** hi = j; - - while (lo < hi) { - // INVARIANT: - //[i, lo) <= time_ns - //[lo, hi) ? - //[hi, j) > time_ns - - Cluster** const mid = lo + (hi - lo) / 2; - assert(mid < hi); - - pCluster = *mid; - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - lo = mid + 1; - else - hi = mid; - - assert(lo <= hi); - } - - assert(lo == hi); - assert(lo > i); - assert(lo <= j); - - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this, time_ns); - - if ((pResult != 0) && !pResult->EOS()) // found a keyframe - return 0; - - while (lo != i) { - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this, time_ns); - - if ((pResult != 0) && !pResult->EOS()) - return 0; - } - - // weird: we're on the first cluster, but no keyframe found - // should never happen but we must return something anyway - - pResult = GetEOS(); - return 0; -} - -Colour* VideoTrack::GetColour() const { return m_colour; } - -Projection* VideoTrack::GetProjection() const { return m_projection; } - -long long VideoTrack::GetWidth() const { return m_width; } - -long long VideoTrack::GetHeight() const { return m_height; } - -long long VideoTrack::GetDisplayWidth() const { - return m_display_width > 0 ? m_display_width : GetWidth(); -} - -long long VideoTrack::GetDisplayHeight() const { - return m_display_height > 0 ? m_display_height : GetHeight(); -} - -long long VideoTrack::GetDisplayUnit() const { return m_display_unit; } - -long long VideoTrack::GetStereoMode() const { return m_stereo_mode; } - -double VideoTrack::GetFrameRate() const { return m_rate; } - -AudioTrack::AudioTrack(Segment* pSegment, long long element_start, - long long element_size) - : Track(pSegment, element_start, element_size) {} - -long AudioTrack::Parse(Segment* pSegment, const Info& info, - long long element_start, long long element_size, - AudioTrack*& pResult) { - if (pResult) - return -1; - - if (info.type != Track::kAudio) - return -1; - - IMkvReader* const pReader = pSegment->m_pReader; - - const Settings& s = info.settings; - assert(s.start >= 0); - assert(s.size >= 0); - - long long pos = s.start; - assert(pos >= 0); - - const long long stop = pos + s.size; - - double rate = 8000.0; // MKV default - long long channels = 1; - long long bit_depth = 0; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSamplingFrequency) { - status = UnserializeFloat(pReader, pos, size, rate); - - if (status < 0) - return status; - - if (rate <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvChannels) { - channels = UnserializeUInt(pReader, pos, size); - - if (channels <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvBitDepth) { - bit_depth = UnserializeUInt(pReader, pos, size); - - if (bit_depth <= 0) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - AudioTrack* const pTrack = - new (std::nothrow) AudioTrack(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { - delete pTrack; - return status; - } - - pTrack->m_rate = rate; - pTrack->m_channels = channels; - pTrack->m_bitDepth = bit_depth; - - pResult = pTrack; - return 0; // success -} - -double AudioTrack::GetSamplingRate() const { return m_rate; } - -long long AudioTrack::GetChannels() const { return m_channels; } - -long long AudioTrack::GetBitDepth() const { return m_bitDepth; } - -Tracks::Tracks(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_trackEntries(NULL), - m_trackEntriesEnd(NULL) {} - -long Tracks::Parse() { - assert(m_trackEntries == NULL); - assert(m_trackEntriesEnd == NULL); - - const long long stop = m_start + m_size; - IMkvReader* const pReader = m_pSegment->m_pReader; - - int count = 0; - long long pos = m_start; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvTrackEntry) - ++count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - if (count <= 0) - return 0; // success - - m_trackEntries = new (std::nothrow) Track*[count]; - - if (m_trackEntries == NULL) - return -1; - - m_trackEntriesEnd = m_trackEntries; - - pos = m_start; - - while (pos < stop) { - const long long element_start = pos; - - long long id, payload_size; - - const long status = - ParseElementHeader(pReader, pos, stop, id, payload_size); - - if (status < 0) // error - return status; - - if (payload_size == 0) // weird - continue; - - const long long payload_stop = pos + payload_size; - assert(payload_stop <= stop); // checked in ParseElement - - const long long element_size = payload_stop - element_start; - - if (id == libwebm::kMkvTrackEntry) { - Track*& pTrack = *m_trackEntriesEnd; - pTrack = NULL; - - const long status = ParseTrackEntry(pos, payload_size, element_start, - element_size, pTrack); - if (status) - return status; - - if (pTrack) - ++m_trackEntriesEnd; - } - - pos = payload_stop; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -unsigned long Tracks::GetTracksCount() const { - const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries; - assert(result >= 0); - - return static_cast(result); -} - -long Tracks::ParseTrackEntry(long long track_start, long long track_size, - long long element_start, long long element_size, - Track*& pResult) const { - if (pResult) - return -1; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = track_start; - const long long track_stop = track_start + track_size; - - Track::Info info; - - info.type = 0; - info.number = 0; - info.uid = 0; - info.defaultDuration = 0; - - Track::Settings v; - v.start = -1; - v.size = -1; - - Track::Settings a; - a.start = -1; - a.size = -1; - - Track::Settings e; // content_encodings_settings; - e.start = -1; - e.size = -1; - - long long lacing = 1; // default is true - - while (pos < track_stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, track_stop, id, size); - - if (status < 0) // error - return status; - - if (size < 0) - return E_FILE_FORMAT_INVALID; - - const long long start = pos; - - if (id == libwebm::kMkvVideo) { - v.start = start; - v.size = size; - } else if (id == libwebm::kMkvAudio) { - a.start = start; - a.size = size; - } else if (id == libwebm::kMkvContentEncodings) { - e.start = start; - e.size = size; - } else if (id == libwebm::kMkvTrackUID) { - if (size > 8) - return E_FILE_FORMAT_INVALID; - - info.uid = 0; - - long long pos_ = start; - const long long pos_end = start + size; - - while (pos_ != pos_end) { - unsigned char b; - - const int status = pReader->Read(pos_, 1, &b); - - if (status) - return status; - - info.uid <<= 8; - info.uid |= b; - - ++pos_; - } - } else if (id == libwebm::kMkvTrackNumber) { - const long long num = UnserializeUInt(pReader, pos, size); - - if ((num <= 0) || (num > 127)) - return E_FILE_FORMAT_INVALID; - - info.number = static_cast(num); - } else if (id == libwebm::kMkvTrackType) { - const long long type = UnserializeUInt(pReader, pos, size); - - if ((type <= 0) || (type > 254)) - return E_FILE_FORMAT_INVALID; - - info.type = static_cast(type); - } else if (id == libwebm::kMkvName) { - const long status = - UnserializeString(pReader, pos, size, info.nameAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvLanguage) { - const long status = UnserializeString(pReader, pos, size, info.language); - - if (status) - return status; - } else if (id == libwebm::kMkvDefaultDuration) { - const long long duration = UnserializeUInt(pReader, pos, size); - - if (duration < 0) - return E_FILE_FORMAT_INVALID; - - info.defaultDuration = static_cast(duration); - } else if (id == libwebm::kMkvCodecID) { - const long status = UnserializeString(pReader, pos, size, info.codecId); - - if (status) - return status; - } else if (id == libwebm::kMkvFlagLacing) { - lacing = UnserializeUInt(pReader, pos, size); - - if ((lacing < 0) || (lacing > 1)) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvCodecPrivate) { - delete[] info.codecPrivate; - info.codecPrivate = NULL; - info.codecPrivateSize = 0; - - const size_t buflen = static_cast(size); - - if (buflen) { - unsigned char* buf = SafeArrayAlloc(1, buflen); - - if (buf == NULL) - return -1; - - const int status = pReader->Read(pos, static_cast(buflen), buf); - - if (status) { - delete[] buf; - return status; - } - - info.codecPrivate = buf; - info.codecPrivateSize = buflen; - } - } else if (id == libwebm::kMkvCodecName) { - const long status = - UnserializeString(pReader, pos, size, info.codecNameAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvCodecDelay) { - info.codecDelay = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvSeekPreRoll) { - info.seekPreRoll = UnserializeUInt(pReader, pos, size); - } - - pos += size; // consume payload - if (pos > track_stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != track_stop) - return E_FILE_FORMAT_INVALID; - - if (info.number <= 0) // not specified - return E_FILE_FORMAT_INVALID; - - if (GetTrackByNumber(info.number)) - return E_FILE_FORMAT_INVALID; - - if (info.type <= 0) // not specified - return E_FILE_FORMAT_INVALID; - - info.lacing = (lacing > 0) ? true : false; - - if (info.type == Track::kVideo) { - if (v.start < 0) - return E_FILE_FORMAT_INVALID; - - if (a.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings = v; - - VideoTrack* pTrack = NULL; - - const long status = VideoTrack::Parse(m_pSegment, info, element_start, - element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - - if (e.start >= 0) - pResult->ParseContentEncodingsEntry(e.start, e.size); - } else if (info.type == Track::kAudio) { - if (a.start < 0) - return E_FILE_FORMAT_INVALID; - - if (v.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings = a; - - AudioTrack* pTrack = NULL; - - const long status = AudioTrack::Parse(m_pSegment, info, element_start, - element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - - if (e.start >= 0) - pResult->ParseContentEncodingsEntry(e.start, e.size); - } else { - // neither video nor audio - probably metadata or subtitles - - if (a.start >= 0) - return E_FILE_FORMAT_INVALID; - - if (v.start >= 0) - return E_FILE_FORMAT_INVALID; - - if (info.type == Track::kMetadata && e.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings.start = -1; - info.settings.size = 0; - - Track* pTrack = NULL; - - const long status = - Track::Create(m_pSegment, info, element_start, element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - } - - return 0; // success -} - -Tracks::~Tracks() { - Track** i = m_trackEntries; - Track** const j = m_trackEntriesEnd; - - while (i != j) { - Track* const pTrack = *i++; - delete pTrack; - } - - delete[] m_trackEntries; -} - -const Track* Tracks::GetTrackByNumber(long tn) const { - if (tn < 0) - return NULL; - - Track** i = m_trackEntries; - Track** const j = m_trackEntriesEnd; - - while (i != j) { - Track* const pTrack = *i++; - - if (pTrack == NULL) - continue; - - if (tn == pTrack->GetNumber()) - return pTrack; - } - - return NULL; // not found -} - -const Track* Tracks::GetTrackByIndex(unsigned long idx) const { - const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries; - - if (idx >= static_cast(count)) - return NULL; - - return m_trackEntries[idx]; -} - -long Cluster::Load(long long& pos, long& len) const { - if (m_pSegment == NULL) - return E_PARSE_FAILED; - - if (m_timecode >= 0) // at least partially loaded - return 0; - - if (m_pos != m_element_start || m_element_size >= 0) - return E_PARSE_FAILED; - - IMkvReader* const pReader = m_pSegment->m_pReader; - long long total, avail; - const int status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && (avail > total || m_pos > total)) - return E_FILE_FORMAT_INVALID; - - pos = m_pos; - - long long cluster_size = -1; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error or underflow - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id_ = ReadID(pReader, pos, len); - - if (id_ < 0) // error - return static_cast(id_); - - if (id_ != libwebm::kMkvCluster) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume id - - // read cluster size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(cluster_size); - - if (size == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size of element - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size != unknown_size) - cluster_size = size; - - // pos points to start of payload - long long timecode = -1; - long long new_pos = -1; - bool bBlock = false; - - long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size; - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - break; - - // Parse ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - if (id == 0) - return E_FILE_FORMAT_INVALID; - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster) - break; - - if (id == libwebm::kMkvCues) - break; - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume size field - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // pos now points to start of payload - - if (size == 0) - continue; - - if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvTimecode) { - len = static_cast(size); - - if ((pos + size) > avail) - return E_BUFFER_NOT_FULL; - - timecode = UnserializeUInt(pReader, pos, size); - - if (timecode < 0) // error (or underflow) - return static_cast(timecode); - - new_pos = pos + size; - - if (bBlock) - break; - } else if (id == libwebm::kMkvBlockGroup) { - bBlock = true; - break; - } else if (id == libwebm::kMkvSimpleBlock) { - bBlock = true; - break; - } - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } - - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if (timecode < 0) // no timecode found - return E_FILE_FORMAT_INVALID; - - if (!bBlock) - return E_FILE_FORMAT_INVALID; - - m_pos = new_pos; // designates position just beyond timecode payload - m_timecode = timecode; // m_timecode >= 0 means we're partially loaded - - if (cluster_size >= 0) - m_element_size = cluster_stop - m_element_start; - - return 0; -} - -long Cluster::Parse(long long& pos, long& len) const { - long status = Load(pos, len); - - if (status < 0) - return status; - - if (m_pos < m_element_start || m_timecode < 0) - return E_PARSE_FAILED; - - const long long cluster_stop = - (m_element_size < 0) ? -1 : m_element_start + m_element_size; - - if ((cluster_stop >= 0) && (m_pos >= cluster_stop)) - return 1; // nothing else to do - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && avail > total) - return E_FILE_FORMAT_INVALID; - - pos = m_pos; - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - break; - - if ((total >= 0) && (pos >= total)) { - if (m_element_size < 0) - m_element_size = pos - m_element_start; - - break; - } - - // Parse ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) { - if (m_element_size < 0) - m_element_size = pos - m_element_start; - - break; - } - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume size field - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // pos now points to start of payload - - if (size == 0) - continue; - - // const long long block_start = pos; - const long long block_stop = pos + size; - - if (cluster_stop >= 0) { - if (block_stop > cluster_stop) { - if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) { - return E_FILE_FORMAT_INVALID; - } - - pos = cluster_stop; - break; - } - } else if ((total >= 0) && (block_stop > total)) { - m_element_size = total - m_element_start; - pos = total; - break; - } else if (block_stop > avail) { - len = static_cast(size); - return E_BUFFER_NOT_FULL; - } - - Cluster* const this_ = const_cast(this); - - if (id == libwebm::kMkvBlockGroup) - return this_->ParseBlockGroup(size, pos, len); - - if (id == libwebm::kMkvSimpleBlock) - return this_->ParseSimpleBlock(size, pos, len); - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } - - if (m_element_size < 1) - return E_FILE_FORMAT_INVALID; - - m_pos = pos; - if (cluster_stop >= 0 && m_pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if (m_entries_count > 0) { - const long idx = m_entries_count - 1; - - const BlockEntry* const pLast = m_entries[idx]; - if (pLast == NULL) - return E_PARSE_FAILED; - - const Block* const pBlock = pLast->GetBlock(); - if (pBlock == NULL) - return E_PARSE_FAILED; - - const long long start = pBlock->m_start; - - if ((total >= 0) && (start > total)) - return E_PARSE_FAILED; // defend against trucated stream - - const long long size = pBlock->m_size; - - const long long stop = start + size; - if (cluster_stop >= 0 && stop > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (stop > total)) - return E_PARSE_FAILED; // defend against trucated stream - } - - return 1; // no more entries -} - -long Cluster::ParseSimpleBlock(long long block_size, long long& pos, - long& len) { - const long long block_start = pos; - const long long block_stop = pos + block_size; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - // parse track number - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long track = ReadUInt(pReader, pos, len); - - if (track < 0) // error - return static_cast(track); - - if (track == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((pos + 2) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 2) > avail) { - len = 2; - return E_BUFFER_NOT_FULL; - } - - pos += 2; // consume timecode - - if ((pos + 1) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - unsigned char flags; - - status = pReader->Read(pos, 1, &flags); - - if (status < 0) { // error or underflow - len = 1; - return status; - } - - ++pos; // consume flags byte - assert(pos <= avail); - - if (pos >= block_stop) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(flags & 0x06) >> 1; - - if ((lacing != 0) && (block_stop > avail)) { - len = static_cast(block_stop - pos); - return E_BUFFER_NOT_FULL; - } - - status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size, - 0); // DiscardPadding - - if (status != 0) - return status; - - m_pos = block_stop; - - return 0; // success -} - -long Cluster::ParseBlockGroup(long long payload_size, long long& pos, - long& len) { - const long long payload_start = pos; - const long long payload_stop = pos + payload_size; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - if ((total >= 0) && (payload_stop > total)) - return E_FILE_FORMAT_INVALID; - - if (payload_stop > avail) { - len = static_cast(payload_size); - return E_BUFFER_NOT_FULL; - } - - long long discard_padding = 0; - - while (pos < payload_stop) { - // parse sub-block element ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > payload_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - if (id == 0) // not a valid ID - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > payload_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field - - // pos now points to start of sub-block group payload - - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvDiscardPadding) { - status = UnserializeInt(pReader, pos, size, discard_padding); - - if (status < 0) // error - return status; - } - - if (id != libwebm::kMkvBlock) { - pos += size; // consume sub-part of block group - - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - const long long block_stop = pos + size; - - if (block_stop > payload_stop) - return E_FILE_FORMAT_INVALID; - - // parse track number - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long track = ReadUInt(pReader, pos, len); - - if (track < 0) // error - return static_cast(track); - - if (track == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((pos + 2) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 2) > avail) { - len = 2; - return E_BUFFER_NOT_FULL; - } - - pos += 2; // consume timecode - - if ((pos + 1) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - unsigned char flags; - - status = pReader->Read(pos, 1, &flags); - - if (status < 0) { // error or underflow - len = 1; - return status; - } - - ++pos; // consume flags byte - assert(pos <= avail); - - if (pos >= block_stop) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(flags & 0x06) >> 1; - - if ((lacing != 0) && (block_stop > avail)) { - len = static_cast(block_stop - pos); - return E_BUFFER_NOT_FULL; - } - - pos = block_stop; // consume block-part of block group - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != payload_stop) - return E_FILE_FORMAT_INVALID; - - status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size, - discard_padding); - if (status != 0) - return status; - - m_pos = payload_stop; - - return 0; // success -} - -long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const { - assert(m_pos >= m_element_start); - - pEntry = NULL; - - if (index < 0) - return -1; // generic error - - if (m_entries_count < 0) - return E_BUFFER_NOT_FULL; - - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count <= m_entries_size); - - if (index < m_entries_count) { - pEntry = m_entries[index]; - assert(pEntry); - - return 1; // found entry - } - - if (m_element_size < 0) // we don't know cluster end yet - return E_BUFFER_NOT_FULL; // underflow - - const long long element_stop = m_element_start + m_element_size; - - if (m_pos >= element_stop) - return 0; // nothing left to parse - - return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed -} - -Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) { - if (!pSegment || off < 0) - return NULL; - - const long long element_start = pSegment->m_start + off; - - Cluster* const pCluster = - new (std::nothrow) Cluster(pSegment, idx, element_start); - - return pCluster; -} - -Cluster::Cluster() - : m_pSegment(NULL), - m_element_start(0), - m_index(0), - m_pos(0), - m_element_size(0), - m_timecode(0), - m_entries(NULL), - m_entries_size(0), - m_entries_count(0) // means "no entries" -{} - -Cluster::Cluster(Segment* pSegment, long idx, long long element_start - /* long long element_size */) - : m_pSegment(pSegment), - m_element_start(element_start), - m_index(idx), - m_pos(element_start), - m_element_size(-1 /* element_size */), - m_timecode(-1), - m_entries(NULL), - m_entries_size(0), - m_entries_count(-1) // means "has not been parsed yet" -{} - -Cluster::~Cluster() { - if (m_entries_count <= 0) { - delete[] m_entries; - return; - } - - BlockEntry** i = m_entries; - BlockEntry** const j = m_entries + m_entries_count; - - while (i != j) { - BlockEntry* p = *i++; - assert(p); - - delete p; - } - - delete[] m_entries; -} - -bool Cluster::EOS() const { return (m_pSegment == NULL); } - -long Cluster::GetIndex() const { return m_index; } - -long long Cluster::GetPosition() const { - const long long pos = m_element_start - m_pSegment->m_start; - assert(pos >= 0); - - return pos; -} - -long long Cluster::GetElementSize() const { return m_element_size; } - -long Cluster::HasBlockEntries( - const Segment* pSegment, - long long off, // relative to start of segment payload - long long& pos, long& len) { - assert(pSegment); - assert(off >= 0); // relative to segment - - IMkvReader* const pReader = pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - pos = pSegment->m_start + off; // absolute - - if ((total >= 0) && (pos >= total)) - return 0; // we don't even have a complete cluster - - const long long segment_stop = - (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size; - - long long cluster_stop = -1; // interpreted later to mean "unknown size" - - { - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + len) > total)) - return 0; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - if (id != libwebm::kMkvCluster) - return E_PARSE_FAILED; - - pos += len; // consume Cluster ID field - - // read size field - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + len) > total)) - return 0; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - if (size == 0) - return 0; // cluster does not have entries - - pos += len; // consume size field - - // pos now points to start of payload - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size != unknown_size) { - cluster_stop = pos + size; - assert(cluster_stop >= 0); - - if ((segment_stop >= 0) && (cluster_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (cluster_stop > total)) - // return E_FILE_FORMAT_INVALID; //too conservative - return 0; // cluster does not have any entries - } - } - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - return 0; // no entries detected - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster) - return 0; // no entries found - - if (id == libwebm::kMkvCues) - return 0; // no entries found - - pos += len; // consume id field - - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // read size field - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // underflow - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field - - // pos now points to start of payload - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // not supported inside cluster - - if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvBlockGroup) - return 1; // have at least one entry - - if (id == libwebm::kMkvSimpleBlock) - return 1; // have at least one entry - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } -} - -long long Cluster::GetTimeCode() const { - long long pos; - long len; - - const long status = Load(pos, len); - - if (status < 0) // error - return status; - - return m_timecode; -} - -long long Cluster::GetTime() const { - const long long tc = GetTimeCode(); - - if (tc < 0) - return tc; - - const SegmentInfo* const pInfo = m_pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - const long long t = m_timecode * scale; - - return t; -} - -long long Cluster::GetFirstTime() const { - const BlockEntry* pEntry; - - const long status = GetFirst(pEntry); - - if (status < 0) // error - return status; - - if (pEntry == NULL) // empty cluster - return GetTime(); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - return pBlock->GetTime(this); -} - -long long Cluster::GetLastTime() const { - const BlockEntry* pEntry; - - const long status = GetLast(pEntry); - - if (status < 0) // error - return status; - - if (pEntry == NULL) // empty cluster - return GetTime(); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - return pBlock->GetTime(this); -} - -long Cluster::CreateBlock(long long id, - long long pos, // absolute pos of payload - long long size, long long discard_padding) { - if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock) - return E_PARSE_FAILED; - - if (m_entries_count < 0) { // haven't parsed anything yet - assert(m_entries == NULL); - assert(m_entries_size == 0); - - m_entries_size = 1024; - m_entries = new (std::nothrow) BlockEntry*[m_entries_size]; - if (m_entries == NULL) - return -1; - - m_entries_count = 0; - } else { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count <= m_entries_size); - - if (m_entries_count >= m_entries_size) { - const long entries_size = 2 * m_entries_size; - - BlockEntry** const entries = new (std::nothrow) BlockEntry*[entries_size]; - if (entries == NULL) - return -1; - - BlockEntry** src = m_entries; - BlockEntry** const src_end = src + m_entries_count; - - BlockEntry** dst = entries; - - while (src != src_end) - *dst++ = *src++; - - delete[] m_entries; - - m_entries = entries; - m_entries_size = entries_size; - } - } - - if (id == libwebm::kMkvBlockGroup) - return CreateBlockGroup(pos, size, discard_padding); - else - return CreateSimpleBlock(pos, size); -} - -long Cluster::CreateBlockGroup(long long start_offset, long long size, - long long discard_padding) { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count >= 0); - assert(m_entries_count < m_entries_size); - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = start_offset; - const long long stop = start_offset + size; - - // For WebM files, there is a bias towards previous reference times - //(in order to support alt-ref frames, which refer back to the previous - // keyframe). Normally a 0 value is not possible, but here we tenatively - // allow 0 as the value of a reference frame, with the interpretation - // that this is a "previous" reference time. - - long long prev = 1; // nonce - long long next = 0; // nonce - long long duration = -1; // really, this is unsigned - - long long bpos = -1; - long long bsize = -1; - - while (pos < stop) { - long len; - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - assert(size >= 0); // TODO - assert((pos + len) <= stop); - - pos += len; // consume size - - if (id == libwebm::kMkvBlock) { - if (bpos < 0) { // Block ID - bpos = pos; - bsize = size; - } - } else if (id == libwebm::kMkvBlockDuration) { - if (size > 8) - return E_FILE_FORMAT_INVALID; - - duration = UnserializeUInt(pReader, pos, size); - - if (duration < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvReferenceBlock) { - if (size > 8 || size <= 0) - return E_FILE_FORMAT_INVALID; - const long size_ = static_cast(size); - - long long time; - - long status = UnserializeInt(pReader, pos, size_, time); - assert(status == 0); - if (status != 0) - return -1; - - if (time <= 0) // see note above - prev = time; - else - next = time; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - if (bpos < 0) - return E_FILE_FORMAT_INVALID; - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - assert(bsize >= 0); - - const long idx = m_entries_count; - - BlockEntry** const ppEntry = m_entries + idx; - BlockEntry*& pEntry = *ppEntry; - - pEntry = new (std::nothrow) - BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding); - - if (pEntry == NULL) - return -1; // generic error - - BlockGroup* const p = static_cast(pEntry); - - const long status = p->Parse(); - - if (status == 0) { // success - ++m_entries_count; - return 0; - } - - delete pEntry; - pEntry = 0; - - return status; -} - -long Cluster::CreateSimpleBlock(long long st, long long sz) { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count >= 0); - assert(m_entries_count < m_entries_size); - - const long idx = m_entries_count; - - BlockEntry** const ppEntry = m_entries + idx; - BlockEntry*& pEntry = *ppEntry; - - pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz); - - if (pEntry == NULL) - return -1; // generic error - - SimpleBlock* const p = static_cast(pEntry); - - const long status = p->Parse(); - - if (status == 0) { - ++m_entries_count; - return 0; - } - - delete pEntry; - pEntry = 0; - - return status; -} - -long Cluster::GetFirst(const BlockEntry*& pFirst) const { - if (m_entries_count <= 0) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pFirst = NULL; - return status; - } - - if (m_entries_count <= 0) { // empty cluster - pFirst = NULL; - return 0; - } - } - - assert(m_entries); - - pFirst = m_entries[0]; - assert(pFirst); - - return 0; // success -} - -long Cluster::GetLast(const BlockEntry*& pLast) const { - for (;;) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pLast = NULL; - return status; - } - - if (status > 0) // no new block - break; - } - - if (m_entries_count <= 0) { - pLast = NULL; - return 0; - } - - assert(m_entries); - - const long idx = m_entries_count - 1; - - pLast = m_entries[idx]; - assert(pLast); - - return 0; -} - -long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const { - assert(pCurr); - assert(m_entries); - assert(m_entries_count > 0); - - size_t idx = pCurr->GetIndex(); - assert(idx < size_t(m_entries_count)); - assert(m_entries[idx] == pCurr); - - ++idx; - - if (idx >= size_t(m_entries_count)) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pNext = NULL; - return status; - } - - if (status > 0) { - pNext = NULL; - return 0; - } - - assert(m_entries); - assert(m_entries_count > 0); - assert(idx < size_t(m_entries_count)); - } - - pNext = m_entries[idx]; - assert(pNext); - - return 0; -} - -long Cluster::GetEntryCount() const { return m_entries_count; } - -const BlockEntry* Cluster::GetEntry(const Track* pTrack, - long long time_ns) const { - assert(pTrack); - - if (m_pSegment == NULL) // this is the special EOS cluster - return pTrack->GetEOS(); - - const BlockEntry* pResult = pTrack->GetEOS(); - - long index = 0; - - for (;;) { - if (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - assert(status >= 0); - - if (status > 0) // completely parsed, and no more entries - return pResult; - - if (status < 0) // should never happen - return 0; - - assert(m_entries); - assert(index < m_entries_count); - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if (pBlock->GetTrackNumber() != pTrack->GetNumber()) { - ++index; - continue; - } - - if (pTrack->VetEntry(pEntry)) { - if (time_ns < 0) // just want first candidate block - return pEntry; - - const long long ns = pBlock->GetTime(this); - - if (ns > time_ns) - return pResult; - - pResult = pEntry; // have a candidate - } else if (time_ns >= 0) { - const long long ns = pBlock->GetTime(this); - - if (ns > time_ns) - return pResult; - } - - ++index; - } -} - -const BlockEntry* Cluster::GetEntry(const CuePoint& cp, - const CuePoint::TrackPosition& tp) const { - assert(m_pSegment); - const long long tc = cp.GetTimeCode(); - - if (tp.m_block > 0) { - const long block = static_cast(tp.m_block); - const long index = block - 1; - - while (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) // TODO: can this happen? - return NULL; - - if (status > 0) // nothing remains to be parsed - return NULL; - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if ((pBlock->GetTrackNumber() == tp.m_track) && - (pBlock->GetTimeCode(this) == tc)) { - return pEntry; - } - } - - long index = 0; - - for (;;) { - if (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) // TODO: can this happen? - return NULL; - - if (status > 0) // nothing remains to be parsed - return NULL; - - assert(m_entries); - assert(index < m_entries_count); - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if (pBlock->GetTrackNumber() != tp.m_track) { - ++index; - continue; - } - - const long long tc_ = pBlock->GetTimeCode(this); - - if (tc_ < tc) { - ++index; - continue; - } - - if (tc_ > tc) - return NULL; - - const Tracks* const pTracks = m_pSegment->GetTracks(); - assert(pTracks); - - const long tn = static_cast(tp.m_track); - const Track* const pTrack = pTracks->GetTrackByNumber(tn); - - if (pTrack == NULL) - return NULL; - - const long long type = pTrack->GetType(); - - if (type == 2) // audio - return pEntry; - - if (type != 1) // not video - return NULL; - - if (!pBlock->IsKey()) - return NULL; - - return pEntry; - } -} - -BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {} -BlockEntry::~BlockEntry() {} -const Cluster* BlockEntry::GetCluster() const { return m_pCluster; } -long BlockEntry::GetIndex() const { return m_index; } - -SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start, - long long size) - : BlockEntry(pCluster, idx), m_block(start, size, 0) {} - -long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); } -BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; } -const Block* SimpleBlock::GetBlock() const { return &m_block; } - -BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start, - long long block_size, long long prev, long long next, - long long duration, long long discard_padding) - : BlockEntry(pCluster, idx), - m_block(block_start, block_size, discard_padding), - m_prev(prev), - m_next(next), - m_duration(duration) {} - -long BlockGroup::Parse() { - const long status = m_block.Parse(m_pCluster); - - if (status) - return status; - - m_block.SetKey((m_prev > 0) && (m_next <= 0)); - - return 0; -} - -BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; } -const Block* BlockGroup::GetBlock() const { return &m_block; } -long long BlockGroup::GetPrevTimeCode() const { return m_prev; } -long long BlockGroup::GetNextTimeCode() const { return m_next; } -long long BlockGroup::GetDurationTimeCode() const { return m_duration; } - -Block::Block(long long start, long long size_, long long discard_padding) - : m_start(start), - m_size(size_), - m_track(0), - m_timecode(-1), - m_flags(0), - m_frames(NULL), - m_frame_count(-1), - m_discard_padding(discard_padding) {} - -Block::~Block() { delete[] m_frames; } - -long Block::Parse(const Cluster* pCluster) { - if (pCluster == NULL) - return -1; - - if (pCluster->m_pSegment == NULL) - return -1; - - assert(m_start >= 0); - assert(m_size >= 0); - assert(m_track <= 0); - assert(m_frames == NULL); - assert(m_frame_count <= 0); - - long long pos = m_start; - const long long stop = m_start + m_size; - - long len; - - IMkvReader* const pReader = pCluster->m_pSegment->m_pReader; - - m_track = ReadUInt(pReader, pos, len); - - if (m_track <= 0) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((stop - pos) < 2) - return E_FILE_FORMAT_INVALID; - - long status; - long long value; - - status = UnserializeInt(pReader, pos, 2, value); - - if (status) - return E_FILE_FORMAT_INVALID; - - if (value < SHRT_MIN) - return E_FILE_FORMAT_INVALID; - - if (value > SHRT_MAX) - return E_FILE_FORMAT_INVALID; - - m_timecode = static_cast(value); - - pos += 2; - - if ((stop - pos) <= 0) - return E_FILE_FORMAT_INVALID; - - status = pReader->Read(pos, 1, &m_flags); - - if (status) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(m_flags & 0x06) >> 1; - - ++pos; // consume flags byte - - if (lacing == 0) { // no lacing - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - m_frame_count = 1; - m_frames = new (std::nothrow) Frame[m_frame_count]; - if (m_frames == NULL) - return -1; - - Frame& f = m_frames[0]; - f.pos = pos; - - const long long frame_size = stop - pos; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = static_cast(frame_size); - - return 0; // success - } - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - unsigned char biased_count; - - status = pReader->Read(pos, 1, &biased_count); - - if (status) - return E_FILE_FORMAT_INVALID; - - ++pos; // consume frame count - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - m_frame_count = int(biased_count) + 1; - - m_frames = new (std::nothrow) Frame[m_frame_count]; - if (m_frames == NULL) - return -1; - - if (!m_frames) - return E_FILE_FORMAT_INVALID; - - if (lacing == 1) { // Xiph - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - long long size = 0; - int frame_count = m_frame_count; - - while (frame_count > 1) { - long frame_size = 0; - - for (;;) { - unsigned char val; - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - status = pReader->Read(pos, 1, &val); - - if (status) - return E_FILE_FORMAT_INVALID; - - ++pos; // consume xiph size byte - - frame_size += val; - - if (val < 255) - break; - } - - Frame& f = *pf++; - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - f.pos = 0; // patch later - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = frame_size; - size += frame_size; // contribution of this frame - - --frame_count; - } - - if (pf >= pf_end || pos > stop) - return E_FILE_FORMAT_INVALID; - - { - Frame& f = *pf++; - - if (pf != pf_end) - return E_FILE_FORMAT_INVALID; - - f.pos = 0; // patch later - - const long long total_size = stop - pos; - - if (total_size < size) - return E_FILE_FORMAT_INVALID; - - const long long frame_size = total_size - size; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = static_cast(frame_size); - } - - pf = m_frames; - while (pf != pf_end) { - Frame& f = *pf++; - assert((pos + f.len) <= stop); - - if ((pos + f.len) > stop) - return E_FILE_FORMAT_INVALID; - - f.pos = pos; - pos += f.len; - } - - assert(pos == stop); - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - } else if (lacing == 2) { // fixed-size lacing - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - const long long total_size = stop - pos; - - if ((total_size % m_frame_count) != 0) - return E_FILE_FORMAT_INVALID; - - const long long frame_size = total_size / m_frame_count; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - while (pf != pf_end) { - assert((pos + frame_size) <= stop); - if ((pos + frame_size) > stop) - return E_FILE_FORMAT_INVALID; - - Frame& f = *pf++; - - f.pos = pos; - f.len = static_cast(frame_size); - - pos += frame_size; - } - - assert(pos == stop); - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - } else { - assert(lacing == 3); // EBML lacing - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - long long size = 0; - int frame_count = m_frame_count; - - long long frame_size = ReadUInt(pReader, pos, len); - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - if (frame_size > LONG_MAX) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size of first frame - - if ((pos + frame_size) > stop) - return E_FILE_FORMAT_INVALID; - - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - { - Frame& curr = *pf; - - curr.pos = 0; // patch later - - curr.len = static_cast(frame_size); - size += curr.len; // contribution of this frame - } - - --frame_count; - - while (frame_count > 1) { - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - const Frame& prev = *pf++; - assert(prev.len == frame_size); - if (prev.len != frame_size) - return E_FILE_FORMAT_INVALID; - - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - Frame& curr = *pf; - - curr.pos = 0; // patch later - - const long long delta_size_ = ReadUInt(pReader, pos, len); - - if (delta_size_ < 0) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of (delta) size - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - const long exp = 7 * len - 1; - const long long bias = (1LL << exp) - 1LL; - const long long delta_size = delta_size_ - bias; - - frame_size += delta_size; - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - if (frame_size > LONG_MAX) - return E_FILE_FORMAT_INVALID; - - curr.len = static_cast(frame_size); - // Check if size + curr.len could overflow. - if (size > LLONG_MAX - curr.len) { - return E_FILE_FORMAT_INVALID; - } - size += curr.len; // contribution of this frame - - --frame_count; - } - - // parse last frame - if (frame_count > 0) { - if (pos > stop || pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - const Frame& prev = *pf++; - assert(prev.len == frame_size); - if (prev.len != frame_size) - return E_FILE_FORMAT_INVALID; - - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - Frame& curr = *pf++; - if (pf != pf_end) - return E_FILE_FORMAT_INVALID; - - curr.pos = 0; // patch later - - const long long total_size = stop - pos; - - if (total_size < size) - return E_FILE_FORMAT_INVALID; - - frame_size = total_size - size; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - curr.len = static_cast(frame_size); - } - - pf = m_frames; - while (pf != pf_end) { - Frame& f = *pf++; - if ((pos + f.len) > stop) - return E_FILE_FORMAT_INVALID; - - f.pos = pos; - pos += f.len; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; // success -} - -long long Block::GetTimeCode(const Cluster* pCluster) const { - if (pCluster == 0) - return m_timecode; - - const long long tc0 = pCluster->GetTimeCode(); - assert(tc0 >= 0); - - // Check if tc0 + m_timecode would overflow. - if (tc0 < 0 || LLONG_MAX - tc0 < m_timecode) { - return -1; - } - - const long long tc = tc0 + m_timecode; - - return tc; // unscaled timecode units -} - -long long Block::GetTime(const Cluster* pCluster) const { - assert(pCluster); - - const long long tc = GetTimeCode(pCluster); - - const Segment* const pSegment = pCluster->m_pSegment; - const SegmentInfo* const pInfo = pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - // Check if tc * scale could overflow. - if (tc != 0 && scale > LLONG_MAX / tc) { - return -1; - } - const long long ns = tc * scale; - - return ns; -} - -long long Block::GetTrackNumber() const { return m_track; } - -bool Block::IsKey() const { - return ((m_flags & static_cast(1 << 7)) != 0); -} - -void Block::SetKey(bool bKey) { - if (bKey) - m_flags |= static_cast(1 << 7); - else - m_flags &= 0x7F; -} - -bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); } - -Block::Lacing Block::GetLacing() const { - const int value = int(m_flags & 0x06) >> 1; - return static_cast(value); -} - -int Block::GetFrameCount() const { return m_frame_count; } - -const Block::Frame& Block::GetFrame(int idx) const { - assert(idx >= 0); - assert(idx < m_frame_count); - - const Frame& f = m_frames[idx]; - assert(f.pos > 0); - assert(f.len > 0); - - return f; -} - -long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const { - assert(pReader); - assert(buf); - - const long status = pReader->Read(pos, len, buf); - return status; -} - -long long Block::GetDiscardPadding() const { return m_discard_padding; } - -} // namespace mkvparser diff --git a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h deleted file mode 100644 index 6dce7e50ba..0000000000 --- a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h +++ /dev/null @@ -1,1145 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#ifndef MKVPARSER_MKVPARSER_H_ -#define MKVPARSER_MKVPARSER_H_ - -#include - -namespace mkvparser { - -const int E_PARSE_FAILED = -1; -const int E_FILE_FORMAT_INVALID = -2; -const int E_BUFFER_NOT_FULL = -3; - -class IMkvReader { - public: - virtual int Read(long long pos, long len, unsigned char* buf) = 0; - virtual int Length(long long* total, long long* available) = 0; - - public: - virtual ~IMkvReader(); -}; - -template -Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size); -long long GetUIntLength(IMkvReader*, long long, long&); -long long ReadUInt(IMkvReader*, long long, long&); -long long ReadID(IMkvReader* pReader, long long pos, long& len); -long long UnserializeUInt(IMkvReader*, long long pos, long long size); - -long UnserializeFloat(IMkvReader*, long long pos, long long size, double&); -long UnserializeInt(IMkvReader*, long long pos, long long size, - long long& result); - -long UnserializeString(IMkvReader*, long long pos, long long size, char*& str); - -long ParseElementHeader(IMkvReader* pReader, - long long& pos, // consume id and size fields - long long stop, // if you know size of element's parent - long long& id, long long& size); - -bool Match(IMkvReader*, long long&, unsigned long, long long&); -bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&); - -void GetVersion(int& major, int& minor, int& build, int& revision); - -struct EBMLHeader { - EBMLHeader(); - ~EBMLHeader(); - long long m_version; - long long m_readVersion; - long long m_maxIdLength; - long long m_maxSizeLength; - char* m_docType; - long long m_docTypeVersion; - long long m_docTypeReadVersion; - - long long Parse(IMkvReader*, long long&); - void Init(); -}; - -class Segment; -class Track; -class Cluster; - -class Block { - Block(const Block&); - Block& operator=(const Block&); - - public: - const long long m_start; - const long long m_size; - - Block(long long start, long long size, long long discard_padding); - ~Block(); - - long Parse(const Cluster*); - - long long GetTrackNumber() const; - long long GetTimeCode(const Cluster*) const; // absolute, but not scaled - long long GetTime(const Cluster*) const; // absolute, and scaled (ns) - bool IsKey() const; - void SetKey(bool); - bool IsInvisible() const; - - enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml }; - Lacing GetLacing() const; - - int GetFrameCount() const; // to index frames: [0, count) - - struct Frame { - long long pos; // absolute offset - long len; - - long Read(IMkvReader*, unsigned char*) const; - }; - - const Frame& GetFrame(int frame_index) const; - - long long GetDiscardPadding() const; - - private: - long long m_track; // Track::Number() - short m_timecode; // relative to cluster - unsigned char m_flags; - - Frame* m_frames; - int m_frame_count; - - protected: - const long long m_discard_padding; -}; - -class BlockEntry { - BlockEntry(const BlockEntry&); - BlockEntry& operator=(const BlockEntry&); - - protected: - BlockEntry(Cluster*, long index); - - public: - virtual ~BlockEntry(); - - bool EOS() const { return (GetKind() == kBlockEOS); } - const Cluster* GetCluster() const; - long GetIndex() const; - virtual const Block* GetBlock() const = 0; - - enum Kind { kBlockEOS, kBlockSimple, kBlockGroup }; - virtual Kind GetKind() const = 0; - - protected: - Cluster* const m_pCluster; - const long m_index; -}; - -class SimpleBlock : public BlockEntry { - SimpleBlock(const SimpleBlock&); - SimpleBlock& operator=(const SimpleBlock&); - - public: - SimpleBlock(Cluster*, long index, long long start, long long size); - long Parse(); - - Kind GetKind() const; - const Block* GetBlock() const; - - protected: - Block m_block; -}; - -class BlockGroup : public BlockEntry { - BlockGroup(const BlockGroup&); - BlockGroup& operator=(const BlockGroup&); - - public: - BlockGroup(Cluster*, long index, - long long block_start, // absolute pos of block's payload - long long block_size, // size of block's payload - long long prev, long long next, long long duration, - long long discard_padding); - - long Parse(); - - Kind GetKind() const; - const Block* GetBlock() const; - - long long GetPrevTimeCode() const; // relative to block's time - long long GetNextTimeCode() const; // as above - long long GetDurationTimeCode() const; - - private: - Block m_block; - const long long m_prev; - const long long m_next; - const long long m_duration; -}; - -/////////////////////////////////////////////////////////////// -// ContentEncoding element -// Elements used to describe if the track data has been encrypted or -// compressed with zlib or header stripping. -class ContentEncoding { - public: - enum { kCTR = 1 }; - - ContentEncoding(); - ~ContentEncoding(); - - // ContentCompression element names - struct ContentCompression { - ContentCompression(); - ~ContentCompression(); - - unsigned long long algo; - unsigned char* settings; - long long settings_len; - }; - - // ContentEncAESSettings element names - struct ContentEncAESSettings { - ContentEncAESSettings() : cipher_mode(kCTR) {} - ~ContentEncAESSettings() {} - - unsigned long long cipher_mode; - }; - - // ContentEncryption element names - struct ContentEncryption { - ContentEncryption(); - ~ContentEncryption(); - - unsigned long long algo; - unsigned char* key_id; - long long key_id_len; - unsigned char* signature; - long long signature_len; - unsigned char* sig_key_id; - long long sig_key_id_len; - unsigned long long sig_algo; - unsigned long long sig_hash_algo; - - ContentEncAESSettings aes_settings; - }; - - // Returns ContentCompression represented by |idx|. Returns NULL if |idx| - // is out of bounds. - const ContentCompression* GetCompressionByIndex(unsigned long idx) const; - - // Returns number of ContentCompression elements in this ContentEncoding - // element. - unsigned long GetCompressionCount() const; - - // Parses the ContentCompression element from |pReader|. |start| is the - // starting offset of the ContentCompression payload. |size| is the size in - // bytes of the ContentCompression payload. |compression| is where the parsed - // values will be stored. - long ParseCompressionEntry(long long start, long long size, - IMkvReader* pReader, - ContentCompression* compression); - - // Returns ContentEncryption represented by |idx|. Returns NULL if |idx| - // is out of bounds. - const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const; - - // Returns number of ContentEncryption elements in this ContentEncoding - // element. - unsigned long GetEncryptionCount() const; - - // Parses the ContentEncAESSettings element from |pReader|. |start| is the - // starting offset of the ContentEncAESSettings payload. |size| is the - // size in bytes of the ContentEncAESSettings payload. |encryption| is - // where the parsed values will be stored. - long ParseContentEncAESSettingsEntry(long long start, long long size, - IMkvReader* pReader, - ContentEncAESSettings* aes); - - // Parses the ContentEncoding element from |pReader|. |start| is the - // starting offset of the ContentEncoding payload. |size| is the size in - // bytes of the ContentEncoding payload. Returns true on success. - long ParseContentEncodingEntry(long long start, long long size, - IMkvReader* pReader); - - // Parses the ContentEncryption element from |pReader|. |start| is the - // starting offset of the ContentEncryption payload. |size| is the size in - // bytes of the ContentEncryption payload. |encryption| is where the parsed - // values will be stored. - long ParseEncryptionEntry(long long start, long long size, - IMkvReader* pReader, ContentEncryption* encryption); - - unsigned long long encoding_order() const { return encoding_order_; } - unsigned long long encoding_scope() const { return encoding_scope_; } - unsigned long long encoding_type() const { return encoding_type_; } - - private: - // Member variables for list of ContentCompression elements. - ContentCompression** compression_entries_; - ContentCompression** compression_entries_end_; - - // Member variables for list of ContentEncryption elements. - ContentEncryption** encryption_entries_; - ContentEncryption** encryption_entries_end_; - - // ContentEncoding element names - unsigned long long encoding_order_; - unsigned long long encoding_scope_; - unsigned long long encoding_type_; - - // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); - ContentEncoding(const ContentEncoding&); - ContentEncoding& operator=(const ContentEncoding&); -}; - -class Track { - Track(const Track&); - Track& operator=(const Track&); - - public: - class Info; - static long Create(Segment*, const Info&, long long element_start, - long long element_size, Track*&); - - enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 }; - - Segment* const m_pSegment; - const long long m_element_start; - const long long m_element_size; - virtual ~Track(); - - long GetType() const; - long GetNumber() const; - unsigned long long GetUid() const; - const char* GetNameAsUTF8() const; - const char* GetLanguage() const; - const char* GetCodecNameAsUTF8() const; - const char* GetCodecId() const; - const unsigned char* GetCodecPrivate(size_t&) const; - bool GetLacing() const; - unsigned long long GetDefaultDuration() const; - unsigned long long GetCodecDelay() const; - unsigned long long GetSeekPreRoll() const; - - const BlockEntry* GetEOS() const; - - struct Settings { - long long start; - long long size; - }; - - class Info { - public: - Info(); - ~Info(); - int Copy(Info&) const; - void Clear(); - long type; - long number; - unsigned long long uid; - unsigned long long defaultDuration; - unsigned long long codecDelay; - unsigned long long seekPreRoll; - char* nameAsUTF8; - char* language; - char* codecId; - char* codecNameAsUTF8; - unsigned char* codecPrivate; - size_t codecPrivateSize; - bool lacing; - Settings settings; - - private: - Info(const Info&); - Info& operator=(const Info&); - int CopyStr(char* Info::*str, Info&) const; - }; - - long GetFirst(const BlockEntry*&) const; - long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const; - virtual bool VetEntry(const BlockEntry*) const; - virtual long Seek(long long time_ns, const BlockEntry*&) const; - - const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const; - unsigned long GetContentEncodingCount() const; - - long ParseContentEncodingsEntry(long long start, long long size); - - protected: - Track(Segment*, long long element_start, long long element_size); - - Info m_info; - - class EOSBlock : public BlockEntry { - public: - EOSBlock(); - - Kind GetKind() const; - const Block* GetBlock() const; - }; - - EOSBlock m_eos; - - private: - ContentEncoding** content_encoding_entries_; - ContentEncoding** content_encoding_entries_end_; -}; - -struct PrimaryChromaticity { - PrimaryChromaticity() : x(0), y(0) {} - ~PrimaryChromaticity() {} - static bool Parse(IMkvReader* reader, long long read_pos, - long long value_size, bool is_x, - PrimaryChromaticity** chromaticity); - float x; - float y; -}; - -struct MasteringMetadata { - static const float kValueNotPresent; - - MasteringMetadata() - : r(NULL), - g(NULL), - b(NULL), - white_point(NULL), - luminance_max(kValueNotPresent), - luminance_min(kValueNotPresent) {} - ~MasteringMetadata() { - delete r; - delete g; - delete b; - delete white_point; - } - - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, - MasteringMetadata** mastering_metadata); - - PrimaryChromaticity* r; - PrimaryChromaticity* g; - PrimaryChromaticity* b; - PrimaryChromaticity* white_point; - float luminance_max; - float luminance_min; -}; - -struct Colour { - static const long long kValueNotPresent; - - // Unless otherwise noted all values assigned upon construction are the - // equivalent of unspecified/default. - Colour() - : matrix_coefficients(kValueNotPresent), - bits_per_channel(kValueNotPresent), - chroma_subsampling_horz(kValueNotPresent), - chroma_subsampling_vert(kValueNotPresent), - cb_subsampling_horz(kValueNotPresent), - cb_subsampling_vert(kValueNotPresent), - chroma_siting_horz(kValueNotPresent), - chroma_siting_vert(kValueNotPresent), - range(kValueNotPresent), - transfer_characteristics(kValueNotPresent), - primaries(kValueNotPresent), - max_cll(kValueNotPresent), - max_fall(kValueNotPresent), - mastering_metadata(NULL) {} - ~Colour() { - delete mastering_metadata; - mastering_metadata = NULL; - } - - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, Colour** colour); - - long long matrix_coefficients; - long long bits_per_channel; - long long chroma_subsampling_horz; - long long chroma_subsampling_vert; - long long cb_subsampling_horz; - long long cb_subsampling_vert; - long long chroma_siting_horz; - long long chroma_siting_vert; - long long range; - long long transfer_characteristics; - long long primaries; - long long max_cll; - long long max_fall; - - MasteringMetadata* mastering_metadata; -}; - -struct Projection { - enum ProjectionType { - kTypeNotPresent = -1, - kRectangular = 0, - kEquirectangular = 1, - kCubeMap = 2, - kMesh = 3, - }; - static const float kValueNotPresent; - Projection() - : type(kTypeNotPresent), - private_data(NULL), - private_data_length(0), - pose_yaw(kValueNotPresent), - pose_pitch(kValueNotPresent), - pose_roll(kValueNotPresent) {} - ~Projection() { delete[] private_data; } - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, Projection** projection); - - ProjectionType type; - unsigned char* private_data; - size_t private_data_length; - float pose_yaw; - float pose_pitch; - float pose_roll; -}; - -class VideoTrack : public Track { - VideoTrack(const VideoTrack&); - VideoTrack& operator=(const VideoTrack&); - - VideoTrack(Segment*, long long element_start, long long element_size); - - public: - virtual ~VideoTrack(); - static long Parse(Segment*, const Info&, long long element_start, - long long element_size, VideoTrack*&); - - long long GetWidth() const; - long long GetHeight() const; - long long GetDisplayWidth() const; - long long GetDisplayHeight() const; - long long GetDisplayUnit() const; - long long GetStereoMode() const; - double GetFrameRate() const; - - bool VetEntry(const BlockEntry*) const; - long Seek(long long time_ns, const BlockEntry*&) const; - - Colour* GetColour() const; - - Projection* GetProjection() const; - - private: - long long m_width; - long long m_height; - long long m_display_width; - long long m_display_height; - long long m_display_unit; - long long m_stereo_mode; - - double m_rate; - - Colour* m_colour; - Projection* m_projection; -}; - -class AudioTrack : public Track { - AudioTrack(const AudioTrack&); - AudioTrack& operator=(const AudioTrack&); - - AudioTrack(Segment*, long long element_start, long long element_size); - - public: - static long Parse(Segment*, const Info&, long long element_start, - long long element_size, AudioTrack*&); - - double GetSamplingRate() const; - long long GetChannels() const; - long long GetBitDepth() const; - - private: - double m_rate; - long long m_channels; - long long m_bitDepth; -}; - -class Tracks { - Tracks(const Tracks&); - Tracks& operator=(const Tracks&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Tracks(Segment*, long long start, long long size, long long element_start, - long long element_size); - - ~Tracks(); - - long Parse(); - - unsigned long GetTracksCount() const; - - const Track* GetTrackByNumber(long tn) const; - const Track* GetTrackByIndex(unsigned long idx) const; - - private: - Track** m_trackEntries; - Track** m_trackEntriesEnd; - - long ParseTrackEntry(long long payload_start, long long payload_size, - long long element_start, long long element_size, - Track*&) const; -}; - -class Chapters { - Chapters(const Chapters&); - Chapters& operator=(const Chapters&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Chapters(Segment*, long long payload_start, long long payload_size, - long long element_start, long long element_size); - - ~Chapters(); - - long Parse(); - - class Atom; - class Edition; - - class Display { - friend class Atom; - Display(); - Display(const Display&); - ~Display(); - Display& operator=(const Display&); - - public: - const char* GetString() const; - const char* GetLanguage() const; - const char* GetCountry() const; - - private: - void Init(); - void ShallowCopy(Display&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - char* m_string; - char* m_language; - char* m_country; - }; - - class Atom { - friend class Edition; - Atom(); - Atom(const Atom&); - ~Atom(); - Atom& operator=(const Atom&); - - public: - unsigned long long GetUID() const; - const char* GetStringUID() const; - - long long GetStartTimecode() const; - long long GetStopTimecode() const; - - long long GetStartTime(const Chapters*) const; - long long GetStopTime(const Chapters*) const; - - int GetDisplayCount() const; - const Display* GetDisplay(int index) const; - - private: - void Init(); - void ShallowCopy(Atom&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - static long long GetTime(const Chapters*, long long timecode); - - long ParseDisplay(IMkvReader*, long long pos, long long size); - bool ExpandDisplaysArray(); - - char* m_string_uid; - unsigned long long m_uid; - long long m_start_timecode; - long long m_stop_timecode; - - Display* m_displays; - int m_displays_size; - int m_displays_count; - }; - - class Edition { - friend class Chapters; - Edition(); - Edition(const Edition&); - ~Edition(); - Edition& operator=(const Edition&); - - public: - int GetAtomCount() const; - const Atom* GetAtom(int index) const; - - private: - void Init(); - void ShallowCopy(Edition&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - long ParseAtom(IMkvReader*, long long pos, long long size); - bool ExpandAtomsArray(); - - Atom* m_atoms; - int m_atoms_size; - int m_atoms_count; - }; - - int GetEditionCount() const; - const Edition* GetEdition(int index) const; - - private: - long ParseEdition(long long pos, long long size); - bool ExpandEditionsArray(); - - Edition* m_editions; - int m_editions_size; - int m_editions_count; -}; - -class Tags { - Tags(const Tags&); - Tags& operator=(const Tags&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Tags(Segment*, long long payload_start, long long payload_size, - long long element_start, long long element_size); - - ~Tags(); - - long Parse(); - - class Tag; - class SimpleTag; - - class SimpleTag { - friend class Tag; - SimpleTag(); - SimpleTag(const SimpleTag&); - ~SimpleTag(); - SimpleTag& operator=(const SimpleTag&); - - public: - const char* GetTagName() const; - const char* GetTagString() const; - - private: - void Init(); - void ShallowCopy(SimpleTag&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - char* m_tag_name; - char* m_tag_string; - }; - - class Tag { - friend class Tags; - Tag(); - Tag(const Tag&); - ~Tag(); - Tag& operator=(const Tag&); - - public: - int GetSimpleTagCount() const; - const SimpleTag* GetSimpleTag(int index) const; - - private: - void Init(); - void ShallowCopy(Tag&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - long ParseSimpleTag(IMkvReader*, long long pos, long long size); - bool ExpandSimpleTagsArray(); - - SimpleTag* m_simple_tags; - int m_simple_tags_size; - int m_simple_tags_count; - }; - - int GetTagCount() const; - const Tag* GetTag(int index) const; - - private: - long ParseTag(long long pos, long long size); - bool ExpandTagsArray(); - - Tag* m_tags; - int m_tags_size; - int m_tags_count; -}; - -class SegmentInfo { - SegmentInfo(const SegmentInfo&); - SegmentInfo& operator=(const SegmentInfo&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - SegmentInfo(Segment*, long long start, long long size, - long long element_start, long long element_size); - - ~SegmentInfo(); - - long Parse(); - - long long GetTimeCodeScale() const; - long long GetDuration() const; // scaled - const char* GetMuxingAppAsUTF8() const; - const char* GetWritingAppAsUTF8() const; - const char* GetTitleAsUTF8() const; - - private: - long long m_timecodeScale; - double m_duration; - char* m_pMuxingAppAsUTF8; - char* m_pWritingAppAsUTF8; - char* m_pTitleAsUTF8; -}; - -class SeekHead { - SeekHead(const SeekHead&); - SeekHead& operator=(const SeekHead&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - SeekHead(Segment*, long long start, long long size, long long element_start, - long long element_size); - - ~SeekHead(); - - long Parse(); - - struct Entry { - Entry(); - - // the SeekHead entry payload - long long id; - long long pos; - - // absolute pos of SeekEntry ID - long long element_start; - - // SeekEntry ID size + size size + payload - long long element_size; - }; - - int GetCount() const; - const Entry* GetEntry(int idx) const; - - struct VoidElement { - // absolute pos of Void ID - long long element_start; - - // ID size + size size + payload size - long long element_size; - }; - - int GetVoidElementCount() const; - const VoidElement* GetVoidElement(int idx) const; - - private: - Entry* m_entries; - int m_entry_count; - - VoidElement* m_void_elements; - int m_void_element_count; - - static bool ParseEntry(IMkvReader*, - long long pos, // payload - long long size, Entry*); -}; - -class Cues; -class CuePoint { - friend class Cues; - - CuePoint(long, long long); - ~CuePoint(); - - CuePoint(const CuePoint&); - CuePoint& operator=(const CuePoint&); - - public: - long long m_element_start; - long long m_element_size; - - bool Load(IMkvReader*); - - long long GetTimeCode() const; // absolute but unscaled - long long GetTime(const Segment*) const; // absolute and scaled (ns units) - - struct TrackPosition { - long long m_track; - long long m_pos; // of cluster - long long m_block; - // codec_state //defaults to 0 - // reference = clusters containing req'd referenced blocks - // reftime = timecode of the referenced block - - bool Parse(IMkvReader*, long long, long long); - }; - - const TrackPosition* Find(const Track*) const; - - private: - const long m_index; - long long m_timecode; - TrackPosition* m_track_positions; - size_t m_track_positions_count; -}; - -class Cues { - friend class Segment; - - Cues(Segment*, long long start, long long size, long long element_start, - long long element_size); - ~Cues(); - - Cues(const Cues&); - Cues& operator=(const Cues&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - bool Find( // lower bound of time_ns - long long time_ns, const Track*, const CuePoint*&, - const CuePoint::TrackPosition*&) const; - - const CuePoint* GetFirst() const; - const CuePoint* GetLast() const; - const CuePoint* GetNext(const CuePoint*) const; - - const BlockEntry* GetBlock(const CuePoint*, - const CuePoint::TrackPosition*) const; - - bool LoadCuePoint() const; - long GetCount() const; // loaded only - // long GetTotal() const; //loaded + preloaded - bool DoneParsing() const; - - private: - bool Init() const; - bool PreloadCuePoint(long&, long long) const; - - mutable CuePoint** m_cue_points; - mutable long m_count; - mutable long m_preload_count; - mutable long long m_pos; -}; - -class Cluster { - friend class Segment; - - Cluster(const Cluster&); - Cluster& operator=(const Cluster&); - - public: - Segment* const m_pSegment; - - public: - static Cluster* Create(Segment*, - long index, // index in segment - long long off); // offset relative to segment - // long long element_size); - - Cluster(); // EndOfStream - ~Cluster(); - - bool EOS() const; - - long long GetTimeCode() const; // absolute, but not scaled - long long GetTime() const; // absolute, and scaled (nanosecond units) - long long GetFirstTime() const; // time (ns) of first (earliest) block - long long GetLastTime() const; // time (ns) of last (latest) block - - long GetFirst(const BlockEntry*&) const; - long GetLast(const BlockEntry*&) const; - long GetNext(const BlockEntry* curr, const BlockEntry*& next) const; - - const BlockEntry* GetEntry(const Track*, long long ns = -1) const; - const BlockEntry* GetEntry(const CuePoint&, - const CuePoint::TrackPosition&) const; - // const BlockEntry* GetMaxKey(const VideoTrack*) const; - - // static bool HasBlockEntries(const Segment*, long long); - - static long HasBlockEntries(const Segment*, long long idoff, long long& pos, - long& size); - - long GetEntryCount() const; - - long Load(long long& pos, long& size) const; - - long Parse(long long& pos, long& size) const; - long GetEntry(long index, const mkvparser::BlockEntry*&) const; - - protected: - Cluster(Segment*, long index, long long element_start); - // long long element_size); - - public: - const long long m_element_start; - long long GetPosition() const; // offset relative to segment - - long GetIndex() const; - long long GetElementSize() const; - // long long GetPayloadSize() const; - - // long long Unparsed() const; - - private: - long m_index; - mutable long long m_pos; - // mutable long long m_size; - mutable long long m_element_size; - mutable long long m_timecode; - mutable BlockEntry** m_entries; - mutable long m_entries_size; - mutable long m_entries_count; - - long ParseSimpleBlock(long long, long long&, long&); - long ParseBlockGroup(long long, long long&, long&); - - long CreateBlock(long long id, long long pos, long long size, - long long discard_padding); - long CreateBlockGroup(long long start_offset, long long size, - long long discard_padding); - long CreateSimpleBlock(long long, long long); -}; - -class Segment { - friend class Cues; - friend class Track; - friend class VideoTrack; - - Segment(const Segment&); - Segment& operator=(const Segment&); - - private: - Segment(IMkvReader*, long long elem_start, - // long long elem_size, - long long pos, long long size); - - public: - IMkvReader* const m_pReader; - const long long m_element_start; - // const long long m_element_size; - const long long m_start; // posn of segment payload - const long long m_size; // size of segment payload - Cluster m_eos; // TODO: make private? - - static long long CreateInstance(IMkvReader*, long long, Segment*&); - ~Segment(); - - long Load(); // loads headers and all clusters - - // for incremental loading - // long long Unparsed() const; - bool DoneParsing() const; - long long ParseHeaders(); // stops when first cluster is found - // long FindNextCluster(long long& pos, long& size) const; - long LoadCluster(long long& pos, long& size); // load one cluster - long LoadCluster(); - - long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos, - long& size); - - const SeekHead* GetSeekHead() const; - const Tracks* GetTracks() const; - const SegmentInfo* GetInfo() const; - const Cues* GetCues() const; - const Chapters* GetChapters() const; - const Tags* GetTags() const; - - long long GetDuration() const; - - unsigned long GetCount() const; - const Cluster* GetFirst() const; - const Cluster* GetLast() const; - const Cluster* GetNext(const Cluster*); - - const Cluster* FindCluster(long long time_nanoseconds) const; - // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const; - - const Cluster* FindOrPreloadCluster(long long pos); - - long ParseCues(long long cues_off, // offset relative to start of segment - long long& parse_pos, long& parse_len); - - private: - long long m_pos; // absolute file posn; what has been consumed so far - Cluster* m_pUnknownSize; - - SeekHead* m_pSeekHead; - SegmentInfo* m_pInfo; - Tracks* m_pTracks; - Cues* m_pCues; - Chapters* m_pChapters; - Tags* m_pTags; - Cluster** m_clusters; - long m_clusterCount; // number of entries for which m_index >= 0 - long m_clusterPreloadCount; // number of entries for which m_index < 0 - long m_clusterSize; // array size - - long DoLoadCluster(long long&, long&); - long DoLoadClusterUnknownSize(long long&, long&); - long DoParseNext(const Cluster*&, long long&, long&); - - bool AppendCluster(Cluster*); - bool PreloadCluster(Cluster*, ptrdiff_t); - - // void ParseSeekHead(long long pos, long long size); - // void ParseSeekEntry(long long pos, long long size); - // void ParseCues(long long); - - const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&); -}; - -} // namespace mkvparser - -inline long mkvparser::Segment::LoadCluster() { - long long pos; - long size; - - return LoadCluster(pos, size); -} - -#endif // MKVPARSER_MKVPARSER_H_ diff --git a/thirdparty/libvpx/AUTHORS b/thirdparty/libvpx/AUTHORS deleted file mode 100644 index fcd5c534a8..0000000000 --- a/thirdparty/libvpx/AUTHORS +++ /dev/null @@ -1,142 +0,0 @@ -# This file is automatically generated from the git commit history -# by tools/gen_authors.sh. - -Aaron Watry -Abo Talib Mahfoodh -Adam Xu -Adrian Grange -Aℓex Converse -Ahmad Sharif -Alexander Voronov -Alexis Ballier -Alok Ahuja -Alpha Lam -A.Mahfoodh -Ami Fischman -Andoni Morales Alastruey -Andres Mejia -Andrew Russell -Angie Chiang -Aron Rosenberg -Attila Nagy -Brion Vibber -changjun.yang -Charles 'Buck' Krasic -chm -Christian Duvivier -Daniele Castagna -Daniel Kang -Deb Mukherjee -Dim Temp -Dmitry Kovalev -Dragan Mrdjan -Ed Baker -Ehsan Akhgari -Erik Niemeyer -Fabio Pedretti -Frank Galligan -Fredrik Söderquist -Fritz Koenig -Gaute Strokkenes -Geza Lore -Ghislain MARY -Giuseppe Scrivano -Gordana Cmiljanovic -Guillaume Martres -Guillermo Ballester Valor -Hangyu Kuang -Hanno Böck -Henrik Lundin -Hui Su -Ivan Maltz -Jacek Caban -Jacky Chen -James Berry -James Yu -James Zern -Jan Gerber -Jan Kratochvil -Janne Salonen -Jean-Yves Avenard -Jeff Faust -Jeff Muizelaar -Jeff Petkau -Jia Jia -Jian Zhou -Jim Bankoski -Jingning Han -Joey Parrish -Johann Koenig -John Koleszar -Johnny Klonaris -John Stark -Joshua Bleecher Snyder -Joshua Litt -Julia Robson -Justin Clift -Justin Lebar -KO Myung-Hun -Lawrence Velázquez -Linfeng Zhang -Lou Quillio -Luca Barbato -Makoto Kato -Mans Rullgard -Marco Paniconi -Mark Mentovai -Martin Ettl -Martin Storsjo -Matthew Heaney -Michael Kohler -Mike Frysinger -Mike Hommey -Mikhal Shemer -Minghai Shang -Morton Jonuschat -Nico Weber -Parag Salasakar -Pascal Massimino -Patrik Westin -Paul Wilkins -Pavol Rusnak -Paweł Hajdan -Pengchong Jin -Peter de Rivaz -Philip Jägenstedt -Priit Laes -Rafael Ávila de Espíndola -Rafaël Carré -Ralph Giles -Rob Bradford -Ronald S. Bultje -Rui Ueyama -Sami Pietilä -Sasi Inguva -Scott Graham -Scott LaVarnway -Sean McGovern -Sergey Kolomenkin -Sergey Ulanov -Shimon Doodkin -Shunyao Li -Stefan Holmer -Suman Sunkara -Taekhyun Kim -Takanori MATSUURA -Tamar Levy -Tao Bai -Tero Rintaluoma -Thijs Vermeir -Tim Kopp -Timothy B. Terriberry -Tom Finegan -Vignesh Venkatasubramanian -Yaowu Xu -Yi Luo -Yongzhe Wang -Yunqing Wang -Yury Gitman -Zoe Liu -Google Inc. -The Mozilla Foundation -The Xiph.Org Foundation diff --git a/thirdparty/libvpx/CHANGELOG b/thirdparty/libvpx/CHANGELOG deleted file mode 100644 index 795d395f96..0000000000 --- a/thirdparty/libvpx/CHANGELOG +++ /dev/null @@ -1,654 +0,0 @@ -2016-07-20 v1.6.0 "Khaki Campbell Duck" - This release improves upon the VP9 encoder and speeds up the encoding and - decoding processes. - - - Upgrading: - This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum - in vpx_image and some minor changes to the VP8_COMP structure. - - The default key frame interval for VP9 has changed from 128 to 9999. - - - Enhancement: - A core focus has been performance for low end Intel processors. SSSE3 - instructions such as 'pshufb' have been avoided and instructions have been - reordered to better accommodate the more constrained pipelines. - - As a result, devices based on Celeron processors have seen substantial - decoding improvements. From Indian Runner Duck to Javan Whistling Duck, - decoding speed improved between 10 and 30%. Between Javan Whistling Duck - and Khaki Campbell Duck, it improved another 10 to 15%. - - While Celeron benefited most, Core-i5 also improved 5% and 10% between the - respective releases. - - Realtime performance for WebRTC for both speed and quality has received a - lot of attention. - - - Bug Fixes: - A number of fuzzing issues, found variously by Mozilla, Chromium and others, - have been fixed and we strongly recommend updating. - -2015-11-09 v1.5.0 "Javan Whistling Duck" - This release improves upon the VP9 encoder and speeds up the encoding and - decoding processes. - - - Upgrading: - This release is ABI incompatible with 1.4.0. It drops deprecated VP8 - controls and adds a variety of VP9 controls for testing. - - The vpxenc utility now prefers VP9 by default. - - - Enhancements: - Faster VP9 encoding and decoding - Smaller library size by combining functions used by VP8 and VP9 - - - Bug Fixes: - A variety of fuzzing issues - -2015-04-03 v1.4.0 "Indian Runner Duck" - This release includes significant improvements to the VP9 codec. - - - Upgrading: - This release is ABI incompatible with 1.3.0. It drops the compatibility - layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec - controls for VP9. - - - Enhancements: - Faster VP9 encoding and decoding - Multithreaded VP9 decoding (tile and frame-based) - Multithreaded VP9 encoding - on by default - YUV 4:2:2 and 4:4:4 support in VP9 - 10 and 12bit support in VP9 - 64bit ARM support by replacing ARM assembly with intrinsics - - - Bug Fixes: - Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0 - files. - - - Known Issues: - Frame Parallel decoding fails for segmented and non-420 files. - -2013-11-15 v1.3.0 "Forest" - This release introduces the VP9 codec in a backward-compatible way. - All existing users of VP8 can continue to use the library without - modification. However, some VP8 options do not map to VP9 in the same manner. - - The VP9 encoder in this release is not feature complete. Users interested in - the encoder are advised to use the git master branch and discuss issues on - libvpx mailing lists. - - - Upgrading: - This release is ABI and API compatible with Duclair (v1.0.0). Users - of older releases should refer to the Upgrading notes in this document - for that release. - - - Enhancements: - Get rid of bashisms in the main build scripts - Added usage info on command line options - Add lossless compression mode - Dll build of libvpx - Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13) - Add option to disable documentation - configure: add --enable-external-build support - make: support V=1 as short form of verbose=yes - configure: support mingw-w64 - configure: support hardfloat armv7 CHOSTS - configure: add support for android x86 - Add estimated completion time to vpxenc - Don't exit on decode errors in vpxenc - vpxenc: support scaling prior to encoding - vpxdec: support scaling output - vpxenc: improve progress indicators with --skip - msvs: Don't link to winmm.lib - Add a new script for producing vcxproj files - Produce Visual Studio 10 and 11 project files - Produce Windows Phone project files - msvs-build: use msbuild for vs >= 2005 - configure: default configure log to config.log - Add encoding option --static-thresh - - - Speed: - Miscellaneous speed optimizations for VP8 and VP9. - - - Quality: - In general, quality is consistent with the Eider release. - - - Bug Fixes: - This release represents approximately a year of engineering effort, - and contains multiple bug fixes. Please refer to git history for details. - - -2012-12-21 v1.2.0 - This release acts as a checkpoint for a large amount of internal refactoring - and testing. It also contains a number of small bugfixes, so all users are - encouraged to upgrade. - - - Upgrading: - This release is ABI and API compatible with Duclair (v1.0.0). Users - of older releases should refer to the Upgrading notes in this - document for that release. - - - Enhancements: - VP8 optimizations for MIPS dspr2 - vpxenc: add -quiet option - - - Speed: - Encoder and decoder speed is consistent with the Eider release. - - - Quality: - In general, quality is consistent with the Eider release. - - Minor tweaks to ARNR filtering - Minor improvements to real time encoding with multiple temporal layers - - - Bug Fixes: - Fixes multithreaded encoder race condition in loopfilter - Fixes multi-resolution threaded encoding - Fix potential encoder dead-lock after picture resize - - -2012-05-09 v1.1.0 "Eider" - This introduces a number of enhancements, mostly focused on real-time - encoding. In addition, it fixes a decoder bug (first introduced in - Duclair) so all users of that release are encouraged to upgrade. - - - Upgrading: - This release is ABI and API compatible with Duclair (v1.0.0). Users - of older releases should refer to the Upgrading notes in this - document for that release. - - This release introduces a new temporal denoiser, controlled by the - VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not - currently take a strength parameter, so the control is effectively - a boolean - zero (off) or non-zero (on). For compatibility with - existing applications, the values accepted are the same as those - for the spatial denoiser (0-6). The temporal denoiser is enabled - by default, and the older spatial denoiser may be restored by - configuring with --disable-temporal-denoising. The temporal denoiser - is more computationally intensive than the spatial one. - - This release removes support for a legacy, decode only API that was - supported, but deprecated, at the initial release of libvpx - (v0.9.0). This is not expected to have any impact. If you are - impacted, you can apply a reversion to commit 2bf8fb58 locally. - Please update to the latest libvpx API if you are affected. - - - Enhancements: - Adds a motion compensated temporal denoiser to the encoder, which - gives higher quality than the older spatial denoiser. (See above - for notes on upgrading). - - In addition, support for new compilers and platforms were added, - including: - improved support for XCode - Android x86 NDK build - OS/2 support - SunCC support - - Changing resolution with vpx_codec_enc_config_set() is now - supported. Previously, reinitializing the codec was required to - change the input resolution. - - The vpxenc application has initial support for producing multiple - encodes from the same input in one call. Resizing is not yet - supported, but varying other codec parameters is. Use -- to - delineate output streams. Options persist from one stream to the - next. - - Also, the vpxenc application will now use a keyframe interval of - 5 seconds by default. Use the --kf-max-dist option to override. - - - Speed: - Decoder performance improved 2.5% versus Duclair. Encoder speed is - consistent with Duclair for most material. Two pass encoding of - slideshow-like material will see significant improvements. - - Large realtime encoding speed gains at a small quality expense are - possible by configuring the on-the-fly bitpacking experiment with - --enable-onthefly-bitpacking. Realtime encoder can be up to 13% - faster (ARM) depending on the number of threads and bitrate - settings. This technique sees constant gain over the 5-16 speed - range. For VC style input the loss seen is up to 0.2dB. See commit - 52cf4dca for further details. - - - Quality: - On the whole, quality is consistent with the Duclair release. Some - tweaks: - - Reduced blockiness in easy sections by applying a penalty to - intra modes. - - Improved quality of static sections (like slideshows) with - two pass encoding. - - Improved keyframe sizing with multiple temporal layers - - - Bug Fixes: - Corrected alt-ref contribution to frame rate for visible updates - to the alt-ref buffer. This affected applications making manual - usage of the frame reference flags, or temporal layers. - - Additional constraints were added to disable multi-frame quality - enhancement (MFQE) in sections of the frame where there is motion. - (#392) - - Fixed corruption issues when vpx_codec_enc_config_set() was called - with spatial resampling enabled. - - Fixed a decoder error introduced in Duclair where the segmentation - map was not being reinitialized on keyframes (#378) - - -2012-01-27 v1.0.0 "Duclair" - Our fourth named release, focused on performance and features related to - real-time encoding. It also fixes a decoder crash bug introduced in - v0.9.7, so all users of that release are encouraged to upgrade. - - - Upgrading: - This release is ABI incompatible with prior releases of libvpx, so the - "major" version number has been bumped to 1. You must recompile your - applications against the latest version of the libvpx headers. The - API remains compatible, and this should not require code changes in most - applications. - - - Enhancements: - This release introduces several substantial new features to the encoder, - of particular interest to real time streaming applications. - - Temporal scalability allows the encoder to produce a stream that can - be decimated to different frame rates, with independent rate targetting - for each substream. - - Multiframe quality enhancement postprocessing can make visual quality - more consistent in the presence of frames that are substantially - different quality than the surrounding frames, as in the temporal - scalability case and in some forced keyframe scenarios. - - Multiple-resolution encoding support allows the encoding of the - same content at different resolutions faster than encoding them - separately. - - - Speed: - Optimization targets for this release included the decoder and the real- - time modes of the encoder. Decoder speed on x86 has improved 10.5% with - this release. Encoder improvements followed a curve where speeds 1-3 - improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved - 1.5% to 10.5%, respectively. "Best" mode speed is consistent with the - Cayuga release. - - - Quality: - Encoder quality in the single stream case is consistent with the Cayuga - release. - - - Bug Fixes: - This release fixes an OOB read decoder crash bug present in v0.9.7 - related to the clamping of motion vectors in SPLITMV blocks. This - behavior could be triggered by corrupt input or by starting - decoding from a P-frame. - - -2011-08-15 v0.9.7-p1 "Cayuga" patch 1 - This is an incremental bugfix release against Cayuga. All users of that - release are strongly encouraged to upgrade. - - - Fix potential OOB reads (cdae03a) - - An unbounded out of bounds read was discovered when the - decoder was requested to perform error concealment (new in - Cayuga) given a frame with corrupt partition sizes. - - A bounded out of bounds read was discovered affecting all - versions of libvpx. Given an multipartition input frame that - is truncated between the mode/mv partition and the first - residiual paritition (in the block of partition offsets), up - to 3 extra bytes could have been read from the source buffer. - The code will not take any action regardless of the contents - of these undefined bytes, as the truncated buffer is detected - immediately following the read based on the calculated - starting position of the coefficient partition. - - - Fix potential error concealment crash when the very first frame - is missing or corrupt (a609be5) - - - Fix significant artifacts in error concealment (a4c2211, 99d870a) - - - Revert 1-pass CBR rate control changes (e961317) - Further testing showed this change produced undesirable visual - artifacts, rolling back for now. - - -2011-08-02 v0.9.7 "Cayuga" - Our third named release, focused on a faster, higher quality, encoder. - - - Upgrading: - This release is backwards compatible with Aylesbury (v0.9.5) and - Bali (v0.9.6). Users of older releases should refer to the Upgrading - notes in this document for that release. - - - Enhancements: - Stereo 3D format support for vpxenc - Runtime detection of available processor cores. - Allow specifying --end-usage by enum name - vpxdec: test for frame corruption - vpxenc: add quantizer histogram display - vpxenc: add rate histogram display - Set VPX_FRAME_IS_DROPPABLE - update configure for ios sdk 4.3 - Avoid text relocations in ARM vp8 decoder - Generate a vpx.pc file for pkg-config. - New ways of passing encoded data between encoder and decoder. - - - Speed: - This release includes across-the-board speed improvements to the - encoder. On x86, these measure at approximately 11.5% in Best mode, - 21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6). - On ARM Cortex A9 with Neon extensions, real-time encoding of video - telephony content is 35% faster than Bali on single core and 48% - faster on multi-core. On the NVidia Tegra2 platform, real time - encoding is 40% faster than Bali. - - Decoder speed was not a priority for this release, but improved - approximately 8.4% on x86. - - Reduce motion vector search on alt-ref frame. - Encoder loopfilter running in its own thread - Reworked loopfilter to precalculate more parameters - SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}(). - Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3. - Removed redundant checks - Reduced structure sizes - utilize preload in ARMv6 MC/LPF/Copy routines - ARM optimized quantization, dfct, variance, subtract - Increase chrow row alignment to 16 bytes. - disable trellis optimization for first pass - Write SSSE3 sub-pixel filter function - Improve SSE2 half-pixel filter funtions - Add vp8_sub_pixel_variance16x8_ssse3 function - Reduce unnecessary distortion computation - Use diamond search to replace full search - Preload reference area in sub-pixel motion search (real-time mode) - - - Quality: - This release focused primarily on one-pass use cases, including - video conferencing. Low latency data rate control was significantly - improved, improving streamability over bandwidth constrained links. - Added support for error concealment, allowing frames to maintain - visual quality in the presence of substantial packet loss. - - Add rc_max_intra_bitrate_pct control - Limit size of initial keyframe in one-pass. - Improve framerate adaptation - Improved 1-pass CBR rate control - Improved KF insertion after fades to still. - Improved key frame detection. - Improved activity masking (lower PSNR impact for same SSIM boost) - Improved interaction between GF and ARFs - Adding error-concealment to the decoder. - Adding support for independent partitions - Adjusted rate-distortion constants - - - - Bug Fixes: - Removed firstpass motion map - Fix parallel make install - Fix multithreaded encoding for 1 MB wide frame - Fixed iwalsh_neon build problems with RVDS4.1 - Fix semaphore emulation, spin-wait intrinsics on Windows - Fix build with xcode4 and simplify GLOBAL. - Mark ARM asm objects as allowing a non-executable stack. - Fix vpxenc encoding incorrect webm file header on big endian - - -2011-03-07 v0.9.6 "Bali" - Our second named release, focused on a faster, higher quality, encoder. - - - Upgrading: - This release is backwards compatible with Aylesbury (v0.9.5). Users - of older releases should refer to the Upgrading notes in this - document for that release. - - - Enhancements: - vpxenc --psnr shows a summary when encode completes - --tune=ssim option to enable activity masking - improved postproc visualizations for development - updated support for Apple iOS to SDK 4.2 - query decoder to determine which reference frames were updated - implemented error tracking in the decoder - fix pipe support on windows - - - Speed: - Primary focus was on good quality mode, speed 0. Average improvement - on x86 about 40%, up to 100% on user-generated content at that speed. - Best quality mode speed improved 35%, and realtime speed 10-20%. This - release also saw significant improvement in realtime encoding speed - on ARM platforms. - - Improved encoder threading - Dont pick encoder filter level when loopfilter is disabled. - Avoid double copying of key frames into alt and golden buffer - FDCT optimizations. - x86 sse2 temporal filter - SSSE3 version of fast quantizer - vp8_rd_pick_best_mbsegmentation code restructure - Adjusted breakout RD for SPLITMV - Changed segmentation check order - Improved rd_pick_intra4x4block - Adds armv6 optimized variance calculation - ARMv6 optimized sad16x16 - ARMv6 optimized half pixel variance calculations - Full search SAD function optimization in SSE4.1 - Improve MV prediction accuracy to achieve performance gain - Improve MV prediction in vp8_pick_inter_mode() for speed>3 - - - Quality: - Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release - also includes support for "activity masking," which greatly improves - SSIM at the expense of PSNR. For now, this feature is available with - the --tune=ssim option. Further experimentation in this area - is ongoing. This release also introduces a new rate control mode - called "CQ," which changes the allocation of bits within a clip to - the sections where they will have the most visual impact. - - Tuning for the more exact quantizer. - Relax rate control for last few frames - CQ Mode - Limit key frame quantizer for forced key frames. - KF/GF Pulsing - Add simple version of activity masking. - make rdmult adaptive for intra in quantizer RDO - cap the best quantizer for 2nd order DC - change the threshold of DC check for encode breakout - - - Bug Fixes: - Fix crash on Sparc Solaris. - Fix counter of fixed keyframe distance - ARNR filter pointer update bug fix - Fixed use of motion percentage in KF/GF group calc - Changed condition for using RD in Intra Mode - Fix encoder real-time only configuration. - Fix ARM encoder crash with multiple token partitions - Fixed bug first cluster timecode of webm file is wrong. - Fixed various encoder bugs with odd-sized images - vp8e_get_preview fixed when spatial resampling enabled - quantizer: fix assertion in fast quantizer path - Allocate source buffers to be multiples of 16 - Fix for manual Golden frame frequency - Fix drastic undershoot in long form content - - -2010-10-28 v0.9.5 "Aylesbury" - Our first named release, focused on a faster decoder, and a better encoder. - - - Upgrading: - This release incorporates backwards-incompatible changes to the - ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec. - - vpxdec - * the -q (quiet) option has been removed, and replaced with - -v (verbose). the output is quiet by default. Use -v to see - the version number of the binary. - - * The default behavior is now to write output to a single file - instead of individual frames. The -y option has been removed. - Y4M output is the default. - - * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12 - options must be specified. - - $ ivfdec -o OUTPUT INPUT - $ vpxdec --i420 -o OUTPUT INPUT - - * If an output file is not specified, the default is to write - Y4M to stdout. This makes piping more natural. - - $ ivfdec -y -o - INPUT | ... - $ vpxdec INPUT | ... - - * The output file has additional flexibility for formatting the - filename. It supports escape characters for constructing a - filename from the width, height, and sequence number. This - replaces the -p option. To get the equivalent: - - $ ivfdec -p frame INPUT - $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT - - vpxenc - * The output file must be specified with -o, rather than as the - last argument. - - $ ivfenc INPUT OUTPUT - $ vpxenc -o OUTPUT INPUT - - * The output defaults to webm. To get IVF output, use the --ivf - option. - - $ ivfenc INPUT OUTPUT.ivf - $ vpxenc -o OUTPUT.ivf --ivf INPUT - - - - Enhancements: - ivfenc and ivfdec have been renamed to vpxenc, vpxdec. - vpxdec supports .webm input - vpxdec writes .y4m by default - vpxenc writes .webm output by default - vpxenc --psnr now shows the average/overall PSNR at the end - ARM platforms now support runtime cpu detection - vpxdec visualizations added for motion vectors, block modes, references - vpxdec now silent by default - vpxdec --progress shows frame-by-frame timing information - vpxenc supports the distinction between --fps and --timebase - NASM is now a supported assembler - configure: enable PIC for shared libs by default - configure: add --enable-small - configure: support for ppc32-linux-gcc - configure: support for sparc-solaris-gcc - - - Bugs: - Improve handling of invalid frames - Fix valgrind errors in the NEON loop filters. - Fix loopfilter delta zero transitions - Fix valgrind errors in vp8_sixtap_predict8x4_armv6(). - Build fixes for darwin-icc - - - Speed: - 20-40% (average 28%) improvement in libvpx decoder speed, - including: - Rewrite vp8_short_walsh4x4_sse2() - Optimizations on the loopfilters. - Miscellaneous improvements for Atom - Add 4-tap version of 2nd-pass ARMv6 MC filter. - Improved multithread utilization - Better instruction choices on x86 - reorder data to use wider instructions - Update NEON wide idcts - Make block access to frame buffer sequential - Improved subset block search - Bilinear subpixel optimizations for ssse3. - Decrease memory footprint - - Encoder speed improvements (percentage gain not measured): - Skip unnecessary search of identical frames - Add SSE2 subtract functions - Improve bounds checking in vp8_diamond_search_sadx4() - Added vp8_fast_quantize_b_sse2 - - - Quality: - Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality - encoding mode, and up to 60% improvement on very noisy, still - or slow moving source video - - Motion compensated temporal filter for Alt-Ref Noise Reduction - Improved use of trellis quantization on 2nd order Y blocks - Tune effect of motion on KF/GF boost in two pass - Allow coefficient optimization for good quality speed 0. - Improved control of active min quantizer for two pass. - Enable ARFs for non-lagged compress - -2010-09-02 v0.9.2 - - Enhancements: - Disable frame dropping by default - Improved multithreaded performance - Improved Force Key Frame Behaviour - Increased rate control buffer level precision - Fix bug in 1st pass motion compensation - ivfenc: correct fixed kf interval, --disable-kf - - Speed: - Changed above and left context data layout - Rework idct calling structure. - Removed unnecessary MB_MODE_INFO copies - x86: SSSE3 sixtap prediction - Reworked IDCT to include reconstruction (add) step - Swap alt/gold/new/last frame buffer ptrs instead of copying. - Improve SSE2 loopfilter functions - Change bitreader to use a larger window. - Avoid loopfilter reinitialization when possible - - Quality: - Normalize quantizer's zero bin and rounding factors - Add trellis quantization. - Make the quantizer exact. - Updates to ARNR filtering algorithm - Fix breakout thresh computation for golden & AltRef frames - Redo the forward 4x4 dct - Improve the accuracy of forward walsh-hadamard transform - Further adjustment of RD behaviour with Q and Zbin. - - Build System: - Allow linking of libs built with MinGW to MSVC - Fix target auto-detection on mingw32 - Allow --cpu= to work for x86. - configure: pass original arguments through to make dist - Fix builds without runtime CPU detection - msvs: fix install of codec sources - msvs: Change devenv.com command line for better msys support - msvs: Add vs9 targets. - Add x86_64-linux-icc target - - Bugs: - Potential crashes on older MinGW builds - Fix two-pass framrate for Y4M input. - Fixed simple loop filter, other crashes on ARM v6 - arm: fix missing dependency with --enable-shared - configure: support directories containing .o - Replace pinsrw (SSE) with MMX instructions - apple: include proper mach primatives - Fixed rate control bug with long key frame interval. - Fix DSO link errors on x86-64 when not using a version script - Fixed buffer selection for UV in AltRef filtering - - -2010-06-17 v0.9.1 - - Enhancements: - * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O - * Speed optimizations - - Bugfixes: - * Rate control - * Prevent out-of-bounds accesses on invalid data - - Build system updates: - * Detect toolchain to be used automatically for native builds - * Support building shared libraries - * Better autotools emulation (--prefix, --libdir, DESTDIR) - - Updated LICENSE - * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html - - -2010-05-18 v0.9.0 - - Initial open source release. Welcome to WebM and VP8! - diff --git a/thirdparty/libvpx/LICENSE b/thirdparty/libvpx/LICENSE deleted file mode 100644 index 1ce44343c4..0000000000 --- a/thirdparty/libvpx/LICENSE +++ /dev/null @@ -1,31 +0,0 @@ -Copyright (c) 2010, The WebM Project authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - * Neither the name of Google, nor the WebM Project, nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/thirdparty/libvpx/PATENTS b/thirdparty/libvpx/PATENTS deleted file mode 100644 index caedf607e9..0000000000 --- a/thirdparty/libvpx/PATENTS +++ /dev/null @@ -1,23 +0,0 @@ -Additional IP Rights Grant (Patents) ------------------------------------- - -"These implementations" means the copyrightable works that implement the WebM -codecs distributed by Google as part of the WebM Project. - -Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, -royalty-free, irrevocable (except as stated in this section) patent license to -make, have made, use, offer to sell, sell, import, transfer, and otherwise -run, modify and propagate the contents of these implementations of WebM, where -such license applies only to those patent claims, both currently owned by -Google and acquired in the future, licensable by Google that are necessarily -infringed by these implementations of WebM. This grant does not include claims -that would be infringed only as a consequence of further modification of these -implementations. If you or your agent or exclusive licensee institute or order -or agree to the institution of patent litigation or any other patent -enforcement activity against any entity (including a cross-claim or -counterclaim in a lawsuit) alleging that any of these implementations of WebM -or any code incorporated within any of these implementations of WebM -constitute direct or contributory patent infringement, or inducement of -patent infringement, then any patent rights granted to you under this License -for these implementations of WebM shall terminate as of the date such -litigation is filed. diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h b/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h deleted file mode 100644 index 5c9b7aa392..0000000000 --- a/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h +++ /dev/null @@ -1,240 +0,0 @@ -#ifndef VP8_RTCD_H_ -#define VP8_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP8 - */ - -struct blockd; -struct loop_filter_info; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c - -void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_clear_system_state_c(); -#define vp8_clear_system_state vp8_clear_system_state_c - -void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); - -void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); -void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride); -RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride); - -void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); - -void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs); - -void vp8_dequantize_b_c(struct blockd*, short *dqc); -void vp8_dequantize_b_neon(struct blockd*, short *dqc); -RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc); - -void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bvs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_mbvs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); - -void vp8_short_inv_walsh4x4_c(short *input, short *output); -void vp8_short_inv_walsh4x4_neon(short *input, short *output); -RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output); - -void vp8_short_inv_walsh4x4_1_c(short *input, short *output); -#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c - -void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c - -void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon; -#endif - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon; -#endif - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon; -#endif - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon; -#endif - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon; -#endif - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon; -#endif - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon; -#endif - vp8_dequant_idct_add = vp8_dequant_idct_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon; -#endif - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; -#endif - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon; -#endif - vp8_dequantize_b = vp8_dequantize_b_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon; -#endif - vp8_loop_filter_bh = vp8_loop_filter_bh_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon; -#endif - vp8_loop_filter_bv = vp8_loop_filter_bv_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon; -#endif - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon; -#endif - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon; -#endif - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon; -#endif - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon; -#endif - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon; -#endif - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon; -#endif - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon; -#endif - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon; -#endif - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon; -#endif - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon; -#endif - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon; -#endif -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_c.h b/thirdparty/libvpx/rtcd/vp8_rtcd_c.h deleted file mode 100644 index d1657ac09d..0000000000 --- a/thirdparty/libvpx/rtcd/vp8_rtcd_c.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef VP8_RTCD_H_ -#define VP8_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP8 - */ - -struct blockd; -struct loop_filter_info; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c - -void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c - -void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c - -void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c - -void vp8_clear_system_state_c(); -#define vp8_clear_system_state vp8_clear_system_state_c - -void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -#define vp8_copy_mem16x16 vp8_copy_mem16x16_c - -void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -#define vp8_copy_mem8x4 vp8_copy_mem8x4_c - -void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -#define vp8_copy_mem8x8 vp8_copy_mem8x8_c - -void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -#define vp8_dc_only_idct_add vp8_dc_only_idct_add_c - -void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); -#define vp8_dequant_idct_add vp8_dequant_idct_add_c - -void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c - -void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c - -void vp8_dequantize_b_c(struct blockd*, short *dqc); -#define vp8_dequantize_b vp8_dequantize_b_c - -void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_bh vp8_loop_filter_bh_c - -void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_bv vp8_loop_filter_bv_c - -void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_mbh vp8_loop_filter_mbh_c - -void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_mbv vp8_loop_filter_mbv_c - -void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c - -void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c - -void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c - -void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -#define vp8_short_idct4x4llm vp8_short_idct4x4llm_c - -void vp8_short_inv_walsh4x4_c(short *input, short *output); -#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c - -void vp8_short_inv_walsh4x4_1_c(short *input, short *output); -#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c - -void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c - -void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c - -void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c - -void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c - -void vp8_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h b/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h deleted file mode 100644 index cbe1f47b2a..0000000000 --- a/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h +++ /dev/null @@ -1,247 +0,0 @@ -#ifndef VP8_RTCD_H_ -#define VP8_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP8 - */ - -struct blockd; -struct loop_filter_info; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_clear_system_state_c(); -void vpx_reset_mmx_state(); -RTCD_EXTERN void (*vp8_clear_system_state)(); - -void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x4_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x8_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); - -void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); -void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride); -RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride); - -void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); - -void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs); - -void vp8_dequantize_b_c(struct blockd*, short *dqc); -void vp8_dequantize_b_mmx(struct blockd*, short *dqc); -RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc); - -void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bhs_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bvs_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_horizontal_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_vertical_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); - -void vp8_short_inv_walsh4x4_c(short *input, short *output); -void vp8_short_inv_walsh4x4_mmx(short *input, short *output); -void vp8_short_inv_walsh4x4_sse2(short *input, short *output); -RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output); - -void vp8_short_inv_walsh4x4_1_c(short *input, short *output); -#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c - -void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c; - if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx; - if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; - if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c; - if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx; - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; - if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx; - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; - if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx; - if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; - if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - vp8_clear_system_state = vp8_clear_system_state_c; - if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state; - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; - if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx; - if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2; - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; - if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx; - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; - if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx; - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; - if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx; - vp8_dequant_idct_add = vp8_dequant_idct_add_c; - if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx; - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; - if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; - if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; - if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; - if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; - vp8_dequantize_b = vp8_dequantize_b_c; - if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx; - vp8_loop_filter_bh = vp8_loop_filter_bh_c; - if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; - vp8_loop_filter_bv = vp8_loop_filter_bv_c; - if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2; - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; - if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2; - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; - if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2; - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2; - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2; - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2; - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2; - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; - if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx; - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; - if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx; - if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2; - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; - if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx; - if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; - if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c; - if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; - if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; - if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx; - if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; - if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; - if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; - if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; - if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h b/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h deleted file mode 100644 index afdc7e179e..0000000000 --- a/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef VP9_RTCD_H_ -#define VP9_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP9 - */ - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon; -#endif - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon; -#endif -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_c.h b/thirdparty/libvpx/rtcd/vp9_rtcd_c.h deleted file mode 100644 index 329cb9d04c..0000000000 --- a/thirdparty/libvpx/rtcd/vp9_rtcd_c.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef VP9_RTCD_H_ -#define VP9_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP9 - */ - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c - -void vp9_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h b/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h deleted file mode 100644 index 8ce8067674..0000000000 --- a/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef VP9_RTCD_H_ -#define VP9_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP9 - */ - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - vp9_iht16x16_256_add = vp9_iht16x16_256_add_c; - if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2; - - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; - if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2; - - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; - if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h deleted file mode 100644 index df42f3d24a..0000000000 --- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h +++ /dev/null @@ -1,678 +0,0 @@ -#ifndef VPX_DSP_RTCD_H_ -#define VPX_DSP_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * DSP - */ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c - -void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c - -void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c - -void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c - -void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c - -void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c - -void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c - -void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c - -void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c - -void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c - -void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c - -void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c - -void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c - -void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c - -void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c - -void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c - -void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c - -void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c - -void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c - -void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c - -void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c - -void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c - -void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c - -void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c - -void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c - -void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c - -void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c - -void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_2d vpx_scaled_2d_c - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_horiz vpx_scaled_horiz_c - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_vert vpx_scaled_vert_c - -void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c - -void vpx_dsp_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - vpx_convolve8 = vpx_convolve8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8 = vpx_convolve8_neon; -#endif - vpx_convolve8_avg = vpx_convolve8_avg_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_avg = vpx_convolve8_avg_neon; -#endif - vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_neon; -#endif - vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_neon; -#endif - vpx_convolve8_horiz = vpx_convolve8_horiz_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_horiz = vpx_convolve8_horiz_neon; -#endif - vpx_convolve8_vert = vpx_convolve8_vert_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_vert = vpx_convolve8_vert_neon; -#endif - vpx_convolve_avg = vpx_convolve_avg_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve_avg = vpx_convolve_avg_neon; -#endif - vpx_convolve_copy = vpx_convolve_copy_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve_copy = vpx_convolve_copy_neon; -#endif - vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_neon; -#endif - vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_neon; -#endif - vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_neon; -#endif - vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_neon; -#endif - vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_neon; -#endif - vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_neon; -#endif - vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_neon; -#endif - vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_neon; -#endif - vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_neon; -#endif - vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_neon; -#endif - vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_neon; -#endif - vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_neon; -#endif - vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_neon; -#endif - vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_neon; -#endif - vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_neon; -#endif - vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_neon; -#endif - vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_neon; -#endif - vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_neon; -#endif - vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_neon; -#endif - vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_neon; -#endif - vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_neon; -#endif - vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_neon; -#endif - vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_neon; -#endif - vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_neon; -#endif - vpx_idct16x16_10_add = vpx_idct16x16_10_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct16x16_10_add = vpx_idct16x16_10_add_neon; -#endif - vpx_idct16x16_1_add = vpx_idct16x16_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct16x16_1_add = vpx_idct16x16_1_add_neon; -#endif - vpx_idct16x16_256_add = vpx_idct16x16_256_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct16x16_256_add = vpx_idct16x16_256_add_neon; -#endif - vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_neon; -#endif - vpx_idct32x32_135_add = vpx_idct32x32_135_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_neon; -#endif - vpx_idct32x32_1_add = vpx_idct32x32_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_1_add = vpx_idct32x32_1_add_neon; -#endif - vpx_idct32x32_34_add = vpx_idct32x32_34_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_34_add = vpx_idct32x32_1024_add_neon; -#endif - vpx_idct4x4_16_add = vpx_idct4x4_16_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct4x4_16_add = vpx_idct4x4_16_add_neon; -#endif - vpx_idct4x4_1_add = vpx_idct4x4_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct4x4_1_add = vpx_idct4x4_1_add_neon; -#endif - vpx_idct8x8_12_add = vpx_idct8x8_12_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct8x8_12_add = vpx_idct8x8_12_add_neon; -#endif - vpx_idct8x8_1_add = vpx_idct8x8_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct8x8_1_add = vpx_idct8x8_1_add_neon; -#endif - vpx_idct8x8_64_add = vpx_idct8x8_64_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct8x8_64_add = vpx_idct8x8_64_add_neon; -#endif - vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_neon; -#endif - vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_neon; -#endif - vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_neon; -#endif - vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_neon; -#endif - vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_neon; -#endif - vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_neon; -#endif - vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_neon; -#endif - vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_neon; -#endif - vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_neon; -#endif - vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_neon; -#endif - vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_neon; -#endif - vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_neon; -#endif - vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon; -#endif - vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_neon; -#endif - vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_neon; -#endif - vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_neon; -#endif - vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_neon; -#endif - vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_neon; -#endif - vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_neon; -#endif - vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_neon; -#endif -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h deleted file mode 100644 index 9fcc2f0066..0000000000 --- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h +++ /dev/null @@ -1,355 +0,0 @@ -#ifndef VPX_DSP_RTCD_H_ -#define VPX_DSP_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * DSP - */ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8 vpx_convolve8_c - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_avg vpx_convolve8_avg_c - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_horiz vpx_convolve8_horiz_c - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_vert vpx_convolve8_vert_c - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve_avg vpx_convolve_avg_c - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve_copy vpx_convolve_copy_c - -void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c - -void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c - -void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c - -void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c - -void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c - -void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c - -void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c - -void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c - -void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c - -void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c - -void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c - -void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c - -void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c - -void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c - -void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c - -void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c - -void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c - -void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c - -void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c - -void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c - -void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c - -void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c - -void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c - -void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c - -void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c - -void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c - -void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c - -void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c - -void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c - -void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c - -void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c - -void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c - -void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c - -void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c - -void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c - -void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c - -void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c - -void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c - -void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c - -void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c - -void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c - -void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c - -void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c - -void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c - -void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c - -void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c - -void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c - -void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c - -void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct16x16_1_add vpx_idct16x16_1_add_c - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct16x16_256_add vpx_idct16x16_256_add_c - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_135_add vpx_idct32x32_135_add_c - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_1_add vpx_idct32x32_1_add_c - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_34_add vpx_idct32x32_34_add_c - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct4x4_16_add vpx_idct4x4_16_add_c - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct4x4_1_add vpx_idct4x4_1_add_c - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct8x8_12_add vpx_idct8x8_12_add_c - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct8x8_1_add vpx_idct8x8_1_add_c - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct8x8_64_add vpx_idct8x8_64_add_c - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c - -void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c - -void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c - -void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_edge_16 vpx_lpf_horizontal_edge_16_c - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_edge_8 vpx_lpf_horizontal_edge_8_c - -void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_2d vpx_scaled_2d_c - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_horiz vpx_scaled_horiz_c - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_vert vpx_scaled_vert_c - -void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c - -void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c - -void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c - -void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c - -void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c - -void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c - -void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c - -void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c - -void vpx_dsp_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h deleted file mode 100644 index c2a68330ac..0000000000 --- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h +++ /dev/null @@ -1,604 +0,0 @@ -#ifndef VPX_DSP_RTCD_H_ -#define VPX_DSP_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * DSP - */ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c - -void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c - -void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c - -void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c - -void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c - -void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c - -void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c - -void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c - -void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c - -void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c - -void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c - -void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c - -void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c - -void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c - -void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c - -void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c - -void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c - -void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_iwht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_iwht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c - -void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_scaled_2d)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_horiz vpx_scaled_horiz_c - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_vert vpx_scaled_vert_c - -void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c - -void vpx_dsp_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - vpx_convolve8 = vpx_convolve8_c; - if (flags & HAS_SSE2) vpx_convolve8 = vpx_convolve8_sse2; - if (flags & HAS_SSSE3) vpx_convolve8 = vpx_convolve8_ssse3; - vpx_convolve8_avg = vpx_convolve8_avg_c; - if (flags & HAS_SSE2) vpx_convolve8_avg = vpx_convolve8_avg_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_avg = vpx_convolve8_avg_ssse3; - vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c; - if (flags & HAS_SSE2) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_ssse3; - vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c; - if (flags & HAS_SSE2) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_ssse3; - vpx_convolve8_horiz = vpx_convolve8_horiz_c; - if (flags & HAS_SSE2) vpx_convolve8_horiz = vpx_convolve8_horiz_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_horiz = vpx_convolve8_horiz_ssse3; - vpx_convolve8_vert = vpx_convolve8_vert_c; - if (flags & HAS_SSE2) vpx_convolve8_vert = vpx_convolve8_vert_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_vert = vpx_convolve8_vert_ssse3; - vpx_convolve_avg = vpx_convolve_avg_c; - if (flags & HAS_SSE2) vpx_convolve_avg = vpx_convolve_avg_sse2; - vpx_convolve_copy = vpx_convolve_copy_c; - if (flags & HAS_SSE2) vpx_convolve_copy = vpx_convolve_copy_sse2; - vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_ssse3; - vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_ssse3; - vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_ssse3; - vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_ssse3; - vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_ssse3; - vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_ssse3; - vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_sse2; - vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_c; - if (flags & HAS_SSSE3) vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_ssse3; - vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_ssse3; - vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_ssse3; - vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_sse2; - vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_sse2; - vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_ssse3; - vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_ssse3; - vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3; - vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3; - vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_sse2; - vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_sse2; - vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_sse2; - vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_sse2; - vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_sse2; - vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_sse2; - vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_sse2; - vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_sse2; - vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_sse2; - vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_sse2; - vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_sse2; - vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_sse2; - vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_sse2; - vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_sse2; - vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_sse2; - vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2; - vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_sse2; - vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_sse2; - vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_sse2; - vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_sse2; - vpx_idct16x16_10_add = vpx_idct16x16_10_add_c; - if (flags & HAS_SSE2) vpx_idct16x16_10_add = vpx_idct16x16_10_add_sse2; - vpx_idct16x16_1_add = vpx_idct16x16_1_add_c; - if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2; - vpx_idct16x16_256_add = vpx_idct16x16_256_add_c; - if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2; - vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2; - vpx_idct32x32_135_add = vpx_idct32x32_135_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_sse2; - vpx_idct32x32_1_add = vpx_idct32x32_1_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2; - vpx_idct32x32_34_add = vpx_idct32x32_34_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2; - vpx_idct4x4_16_add = vpx_idct4x4_16_add_c; - if (flags & HAS_SSE2) vpx_idct4x4_16_add = vpx_idct4x4_16_add_sse2; - vpx_idct4x4_1_add = vpx_idct4x4_1_add_c; - if (flags & HAS_SSE2) vpx_idct4x4_1_add = vpx_idct4x4_1_add_sse2; - vpx_idct8x8_12_add = vpx_idct8x8_12_add_c; - if (flags & HAS_SSE2) vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2; - vpx_idct8x8_1_add = vpx_idct8x8_1_add_c; - if (flags & HAS_SSE2) vpx_idct8x8_1_add = vpx_idct8x8_1_add_sse2; - vpx_idct8x8_64_add = vpx_idct8x8_64_add_c; - if (flags & HAS_SSE2) vpx_idct8x8_64_add = vpx_idct8x8_64_add_sse2; - vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_c; - if (flags & HAS_SSE2) vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_sse2; - vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_sse2; - vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_sse2; - vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_sse2; - vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_sse2; - vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_sse2; - vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_sse2; - vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_sse2; - vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_sse2; - vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_sse2; - vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_sse2; - vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_sse2; - vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_sse2; - vpx_scaled_2d = vpx_scaled_2d_c; - if (flags & HAS_SSSE3) vpx_scaled_2d = vpx_scaled_2d_ssse3; - vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_sse2; - vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_sse2; - vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_sse2; - vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_sse2; - vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_sse2; - vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_sse2; - vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_sse2; - vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_sse2; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/third_party/android/cpu-features.c b/thirdparty/libvpx/third_party/android/cpu-features.c deleted file mode 100644 index e2bd749b01..0000000000 --- a/thirdparty/libvpx/third_party/android/cpu-features.c +++ /dev/null @@ -1,1313 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* ChangeLog for this library: - * - * NDK r10e?: Add MIPS MSA feature. - * - * NDK r10: Support for 64-bit CPUs (Intel, ARM & MIPS). - * - * NDK r8d: Add android_setCpu(). - * - * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, - * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. - * - * Rewrite the code to parse /proc/self/auxv instead of - * the "Features" field in /proc/cpuinfo. - * - * Dynamically allocate the buffer that hold the content - * of /proc/cpuinfo to deal with newer hardware. - * - * NDK r7c: Fix CPU count computation. The old method only reported the - * number of _active_ CPUs when the library was initialized, - * which could be less than the real total. - * - * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 - * for an ARMv6 CPU (see below). - * - * Handle kernels that only report 'neon', and not 'vfpv3' - * (VFPv3 is mandated by the ARM architecture is Neon is implemented) - * - * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' - * - * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in - * android_getCpuFamily(). - * - * NDK r4: Initial release - */ - -#include "cpu-features.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static pthread_once_t g_once; -static int g_inited; -static AndroidCpuFamily g_cpuFamily; -static uint64_t g_cpuFeatures; -static int g_cpuCount; - -#ifdef __arm__ -static uint32_t g_cpuIdArm; -#endif - -static const int android_cpufeatures_debug = 0; - -#define D(...) \ - do { \ - if (android_cpufeatures_debug) { \ - printf(__VA_ARGS__); fflush(stdout); \ - } \ - } while (0) - -#ifdef __i386__ -static __inline__ void x86_cpuid(int func, int values[4]) -{ - int a, b, c, d; - /* We need to preserve ebx since we're compiling PIC code */ - /* this means we can't use "=b" for the second output register */ - __asm__ __volatile__ ( \ - "push %%ebx\n" - "cpuid\n" \ - "mov %%ebx, %1\n" - "pop %%ebx\n" - : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ - : "a" (func) \ - ); - values[0] = a; - values[1] = b; - values[2] = c; - values[3] = d; -} -#elif defined(__x86_64__) -static __inline__ void x86_cpuid(int func, int values[4]) -{ - int64_t a, b, c, d; - /* We need to preserve ebx since we're compiling PIC code */ - /* this means we can't use "=b" for the second output register */ - __asm__ __volatile__ ( \ - "push %%rbx\n" - "cpuid\n" \ - "mov %%rbx, %1\n" - "pop %%rbx\n" - : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ - : "a" (func) \ - ); - values[0] = a; - values[1] = b; - values[2] = c; - values[3] = d; -} -#endif - -/* Get the size of a file by reading it until the end. This is needed - * because files under /proc do not always return a valid size when - * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. - */ -static int -get_file_size(const char* pathname) -{ - - int fd, result = 0; - char buffer[256]; - - fd = open(pathname, O_RDONLY); - if (fd < 0) { - D("Can't open %s: %s\n", pathname, strerror(errno)); - return -1; - } - - for (;;) { - int ret = read(fd, buffer, sizeof buffer); - if (ret < 0) { - if (errno == EINTR) - continue; - D("Error while reading %s: %s\n", pathname, strerror(errno)); - break; - } - if (ret == 0) - break; - - result += ret; - } - close(fd); - return result; -} - -/* Read the content of /proc/cpuinfo into a user-provided buffer. - * Return the length of the data, or -1 on error. Does *not* - * zero-terminate the content. Will not read more - * than 'buffsize' bytes. - */ -static int -read_file(const char* pathname, char* buffer, size_t buffsize) -{ - int fd, count; - - fd = open(pathname, O_RDONLY); - if (fd < 0) { - D("Could not open %s: %s\n", pathname, strerror(errno)); - return -1; - } - count = 0; - while (count < (int)buffsize) { - int ret = read(fd, buffer + count, buffsize - count); - if (ret < 0) { - if (errno == EINTR) - continue; - D("Error while reading from %s: %s\n", pathname, strerror(errno)); - if (count == 0) - count = -1; - break; - } - if (ret == 0) - break; - count += ret; - } - close(fd); - return count; -} - -#ifdef __arm__ -/* Extract the content of a the first occurence of a given field in - * the content of /proc/cpuinfo and return it as a heap-allocated - * string that must be freed by the caller. - * - * Return NULL if not found - */ -static char* -extract_cpuinfo_field(const char* buffer, int buflen, const char* field) -{ - int fieldlen = strlen(field); - const char* bufend = buffer + buflen; - char* result = NULL; - int len; - const char *p, *q; - - /* Look for first field occurence, and ensures it starts the line. */ - p = buffer; - for (;;) { - p = memmem(p, bufend-p, field, fieldlen); - if (p == NULL) - goto EXIT; - - if (p == buffer || p[-1] == '\n') - break; - - p += fieldlen; - } - - /* Skip to the first column followed by a space */ - p += fieldlen; - p = memchr(p, ':', bufend-p); - if (p == NULL || p[1] != ' ') - goto EXIT; - - /* Find the end of the line */ - p += 2; - q = memchr(p, '\n', bufend-p); - if (q == NULL) - q = bufend; - - /* Copy the line into a heap-allocated buffer */ - len = q-p; - result = malloc(len+1); - if (result == NULL) - goto EXIT; - - memcpy(result, p, len); - result[len] = '\0'; - -EXIT: - return result; -} - -/* Checks that a space-separated list of items contains one given 'item'. - * Returns 1 if found, 0 otherwise. - */ -static int -has_list_item(const char* list, const char* item) -{ - const char* p = list; - int itemlen = strlen(item); - - if (list == NULL) - return 0; - - while (*p) { - const char* q; - - /* skip spaces */ - while (*p == ' ' || *p == '\t') - p++; - - /* find end of current list item */ - q = p; - while (*q && *q != ' ' && *q != '\t') - q++; - - if (itemlen == q-p && !memcmp(p, item, itemlen)) - return 1; - - /* skip to next item */ - p = q; - } - return 0; -} -#endif /* __arm__ */ - -/* Parse a number starting from 'input', but not going further - * than 'limit'. Return the value into '*result'. - * - * NOTE: Does not skip over leading spaces, or deal with sign characters. - * NOTE: Ignores overflows. - * - * The function returns NULL in case of error (bad format), or the new - * position after the decimal number in case of success (which will always - * be <= 'limit'). - */ -static const char* -parse_number(const char* input, const char* limit, int base, int* result) -{ - const char* p = input; - int val = 0; - while (p < limit) { - int d = (*p - '0'); - if ((unsigned)d >= 10U) { - d = (*p - 'a'); - if ((unsigned)d >= 6U) - d = (*p - 'A'); - if ((unsigned)d >= 6U) - break; - d += 10; - } - if (d >= base) - break; - val = val*base + d; - p++; - } - if (p == input) - return NULL; - - *result = val; - return p; -} - -static const char* -parse_decimal(const char* input, const char* limit, int* result) -{ - return parse_number(input, limit, 10, result); -} - -#ifdef __arm__ -static const char* -parse_hexadecimal(const char* input, const char* limit, int* result) -{ - return parse_number(input, limit, 16, result); -} -#endif /* __arm__ */ - -/* This small data type is used to represent a CPU list / mask, as read - * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt - * - * For now, we don't expect more than 32 cores on mobile devices, so keep - * everything simple. - */ -typedef struct { - uint32_t mask; -} CpuList; - -static __inline__ void -cpulist_init(CpuList* list) { - list->mask = 0; -} - -static __inline__ void -cpulist_and(CpuList* list1, CpuList* list2) { - list1->mask &= list2->mask; -} - -static __inline__ void -cpulist_set(CpuList* list, int index) { - if ((unsigned)index < 32) { - list->mask |= (uint32_t)(1U << index); - } -} - -static __inline__ int -cpulist_count(CpuList* list) { - return __builtin_popcount(list->mask); -} - -/* Parse a textual list of cpus and store the result inside a CpuList object. - * Input format is the following: - * - comma-separated list of items (no spaces) - * - each item is either a single decimal number (cpu index), or a range made - * of two numbers separated by a single dash (-). Ranges are inclusive. - * - * Examples: 0 - * 2,4-127,128-143 - * 0-1 - */ -static void -cpulist_parse(CpuList* list, const char* line, int line_len) -{ - const char* p = line; - const char* end = p + line_len; - const char* q; - - /* NOTE: the input line coming from sysfs typically contains a - * trailing newline, so take care of it in the code below - */ - while (p < end && *p != '\n') - { - int val, start_value, end_value; - - /* Find the end of current item, and put it into 'q' */ - q = memchr(p, ',', end-p); - if (q == NULL) { - q = end; - } - - /* Get first value */ - p = parse_decimal(p, q, &start_value); - if (p == NULL) - goto BAD_FORMAT; - - end_value = start_value; - - /* If we're not at the end of the item, expect a dash and - * and integer; extract end value. - */ - if (p < q && *p == '-') { - p = parse_decimal(p+1, q, &end_value); - if (p == NULL) - goto BAD_FORMAT; - } - - /* Set bits CPU list bits */ - for (val = start_value; val <= end_value; val++) { - cpulist_set(list, val); - } - - /* Jump to next item */ - p = q; - if (p < end) - p++; - } - -BAD_FORMAT: - ; -} - -/* Read a CPU list from one sysfs file */ -static void -cpulist_read_from(CpuList* list, const char* filename) -{ - char file[64]; - int filelen; - - cpulist_init(list); - - filelen = read_file(filename, file, sizeof file); - if (filelen < 0) { - D("Could not read %s: %s\n", filename, strerror(errno)); - return; - } - - cpulist_parse(list, file, filelen); -} -#if defined(__aarch64__) -// see kernel header -#define HWCAP_FP (1 << 0) -#define HWCAP_ASIMD (1 << 1) -#define HWCAP_AES (1 << 3) -#define HWCAP_PMULL (1 << 4) -#define HWCAP_SHA1 (1 << 5) -#define HWCAP_SHA2 (1 << 6) -#define HWCAP_CRC32 (1 << 7) -#endif - -#if defined(__arm__) - -// See kernel header. -#define HWCAP_VFP (1 << 6) -#define HWCAP_IWMMXT (1 << 9) -#define HWCAP_NEON (1 << 12) -#define HWCAP_VFPv3 (1 << 13) -#define HWCAP_VFPv3D16 (1 << 14) -#define HWCAP_VFPv4 (1 << 16) -#define HWCAP_IDIVA (1 << 17) -#define HWCAP_IDIVT (1 << 18) - -// see kernel header -#define HWCAP2_AES (1 << 0) -#define HWCAP2_PMULL (1 << 1) -#define HWCAP2_SHA1 (1 << 2) -#define HWCAP2_SHA2 (1 << 3) -#define HWCAP2_CRC32 (1 << 4) - -// This is the list of 32-bit ARMv7 optional features that are _always_ -// supported by ARMv8 CPUs, as mandated by the ARM Architecture Reference -// Manual. -#define HWCAP_SET_FOR_ARMV8 \ - ( HWCAP_VFP | \ - HWCAP_NEON | \ - HWCAP_VFPv3 | \ - HWCAP_VFPv4 | \ - HWCAP_IDIVA | \ - HWCAP_IDIVT ) -#endif - -#if defined(__mips__) -// see kernel header -#define HWCAP_MIPS_R6 (1 << 0) -#define HWCAP_MIPS_MSA (1 << 1) -#endif - -#if defined(__arm__) || defined(__aarch64__) || defined(__mips__) - -#define AT_HWCAP 16 -#define AT_HWCAP2 26 - -// Probe the system's C library for a 'getauxval' function and call it if -// it exits, or return 0 for failure. This function is available since API -// level 20. -// -// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the -// edge case where some NDK developers use headers for a platform that is -// newer than the one really targetted by their application. -// This is typically done to use newer native APIs only when running on more -// recent Android versions, and requires careful symbol management. -// -// Note that getauxval() can't really be re-implemented here, because -// its implementation does not parse /proc/self/auxv. Instead it depends -// on values that are passed by the kernel at process-init time to the -// C runtime initialization layer. -static uint32_t -get_elf_hwcap_from_getauxval(int hwcap_type) { - typedef unsigned long getauxval_func_t(unsigned long); - - dlerror(); - void* libc_handle = dlopen("libc.so", RTLD_NOW); - if (!libc_handle) { - D("Could not dlopen() C library: %s\n", dlerror()); - return 0; - } - - uint32_t ret = 0; - getauxval_func_t* func = (getauxval_func_t*) - dlsym(libc_handle, "getauxval"); - if (!func) { - D("Could not find getauxval() in C library\n"); - } else { - // Note: getauxval() returns 0 on failure. Doesn't touch errno. - ret = (uint32_t)(*func)(hwcap_type); - } - dlclose(libc_handle); - return ret; -} -#endif - -#if defined(__arm__) -// Parse /proc/self/auxv to extract the ELF HW capabilities bitmap for the -// current CPU. Note that this file is not accessible from regular -// application processes on some Android platform releases. -// On success, return new ELF hwcaps, or 0 on failure. -static uint32_t -get_elf_hwcap_from_proc_self_auxv(void) { - const char filepath[] = "/proc/self/auxv"; - int fd = TEMP_FAILURE_RETRY(open(filepath, O_RDONLY)); - if (fd < 0) { - D("Could not open %s: %s\n", filepath, strerror(errno)); - return 0; - } - - struct { uint32_t tag; uint32_t value; } entry; - - uint32_t result = 0; - for (;;) { - int ret = TEMP_FAILURE_RETRY(read(fd, (char*)&entry, sizeof entry)); - if (ret < 0) { - D("Error while reading %s: %s\n", filepath, strerror(errno)); - break; - } - // Detect end of list. - if (ret == 0 || (entry.tag == 0 && entry.value == 0)) - break; - if (entry.tag == AT_HWCAP) { - result = entry.value; - break; - } - } - close(fd); - return result; -} - -/* Compute the ELF HWCAP flags from the content of /proc/cpuinfo. - * This works by parsing the 'Features' line, which lists which optional - * features the device's CPU supports, on top of its reference - * architecture. - */ -static uint32_t -get_elf_hwcap_from_proc_cpuinfo(const char* cpuinfo, int cpuinfo_len) { - uint32_t hwcaps = 0; - long architecture = 0; - char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); - if (cpuArch) { - architecture = strtol(cpuArch, NULL, 10); - free(cpuArch); - - if (architecture >= 8L) { - // This is a 32-bit ARM binary running on a 64-bit ARM64 kernel. - // The 'Features' line only lists the optional features that the - // device's CPU supports, compared to its reference architecture - // which are of no use for this process. - D("Faking 32-bit ARM HWCaps on ARMv%ld CPU\n", architecture); - return HWCAP_SET_FOR_ARMV8; - } - } - - char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); - if (cpuFeatures != NULL) { - D("Found cpuFeatures = '%s'\n", cpuFeatures); - - if (has_list_item(cpuFeatures, "vfp")) - hwcaps |= HWCAP_VFP; - if (has_list_item(cpuFeatures, "vfpv3")) - hwcaps |= HWCAP_VFPv3; - if (has_list_item(cpuFeatures, "vfpv3d16")) - hwcaps |= HWCAP_VFPv3D16; - if (has_list_item(cpuFeatures, "vfpv4")) - hwcaps |= HWCAP_VFPv4; - if (has_list_item(cpuFeatures, "neon")) - hwcaps |= HWCAP_NEON; - if (has_list_item(cpuFeatures, "idiva")) - hwcaps |= HWCAP_IDIVA; - if (has_list_item(cpuFeatures, "idivt")) - hwcaps |= HWCAP_IDIVT; - if (has_list_item(cpuFeatures, "idiv")) - hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; - if (has_list_item(cpuFeatures, "iwmmxt")) - hwcaps |= HWCAP_IWMMXT; - - free(cpuFeatures); - } - return hwcaps; -} -#endif /* __arm__ */ - -/* Return the number of cpus present on a given device. - * - * To handle all weird kernel configurations, we need to compute the - * intersection of the 'present' and 'possible' CPU lists and count - * the result. - */ -static int -get_cpu_count(void) -{ - CpuList cpus_present[1]; - CpuList cpus_possible[1]; - - cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); - cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); - - /* Compute the intersection of both sets to get the actual number of - * CPU cores that can be used on this device by the kernel. - */ - cpulist_and(cpus_present, cpus_possible); - - return cpulist_count(cpus_present); -} - -static void -android_cpuInitFamily(void) -{ -#if defined(__arm__) - g_cpuFamily = ANDROID_CPU_FAMILY_ARM; -#elif defined(__i386__) - g_cpuFamily = ANDROID_CPU_FAMILY_X86; -#elif defined(__mips64) -/* Needs to be before __mips__ since the compiler defines both */ - g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64; -#elif defined(__mips__) - g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; -#elif defined(__aarch64__) - g_cpuFamily = ANDROID_CPU_FAMILY_ARM64; -#elif defined(__x86_64__) - g_cpuFamily = ANDROID_CPU_FAMILY_X86_64; -#else - g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; -#endif -} - -static void -android_cpuInit(void) -{ - char* cpuinfo = NULL; - int cpuinfo_len; - - android_cpuInitFamily(); - - g_cpuFeatures = 0; - g_cpuCount = 1; - g_inited = 1; - - cpuinfo_len = get_file_size("/proc/cpuinfo"); - if (cpuinfo_len < 0) { - D("cpuinfo_len cannot be computed!"); - return; - } - cpuinfo = malloc(cpuinfo_len); - if (cpuinfo == NULL) { - D("cpuinfo buffer could not be allocated"); - return; - } - cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); - D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, - cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); - - if (cpuinfo_len < 0) /* should not happen */ { - free(cpuinfo); - return; - } - - /* Count the CPU cores, the value may be 0 for single-core CPUs */ - g_cpuCount = get_cpu_count(); - if (g_cpuCount == 0) { - g_cpuCount = 1; - } - - D("found cpuCount = %d\n", g_cpuCount); - -#ifdef __arm__ - { - /* Extract architecture from the "CPU Architecture" field. - * The list is well-known, unlike the the output of - * the 'Processor' field which can vary greatly. - * - * See the definition of the 'proc_arch' array in - * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in - * same file. - */ - char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); - - if (cpuArch != NULL) { - char* end; - long archNumber; - int hasARMv7 = 0; - - D("found cpuArch = '%s'\n", cpuArch); - - /* read the initial decimal number, ignore the rest */ - archNumber = strtol(cpuArch, &end, 10); - - /* Note that ARMv8 is upwards compatible with ARMv7. */ - if (end > cpuArch && archNumber >= 7) { - hasARMv7 = 1; - } - - /* Unfortunately, it seems that certain ARMv6-based CPUs - * report an incorrect architecture number of 7! - * - * See http://code.google.com/p/android/issues/detail?id=10812 - * - * We try to correct this by looking at the 'elf_format' - * field reported by the 'Processor' field, which is of the - * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for - * an ARMv6-one. - */ - if (hasARMv7) { - char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, - "Processor"); - if (cpuProc != NULL) { - D("found cpuProc = '%s'\n", cpuProc); - if (has_list_item(cpuProc, "(v6l)")) { - D("CPU processor and architecture mismatch!!\n"); - hasARMv7 = 0; - } - free(cpuProc); - } - } - - if (hasARMv7) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; - } - - /* The LDREX / STREX instructions are available from ARMv6 */ - if (archNumber >= 6) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; - } - - free(cpuArch); - } - - /* Extract the list of CPU features from ELF hwcaps */ - uint32_t hwcaps = 0; - hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP); - if (!hwcaps) { - D("Parsing /proc/self/auxv to extract ELF hwcaps!\n"); - hwcaps = get_elf_hwcap_from_proc_self_auxv(); - } - if (!hwcaps) { - // Parsing /proc/self/auxv will fail from regular application - // processes on some Android platform versions, when this happens - // parse proc/cpuinfo instead. - D("Parsing /proc/cpuinfo to extract ELF hwcaps!\n"); - hwcaps = get_elf_hwcap_from_proc_cpuinfo(cpuinfo, cpuinfo_len); - } - - if (hwcaps != 0) { - int has_vfp = (hwcaps & HWCAP_VFP); - int has_vfpv3 = (hwcaps & HWCAP_VFPv3); - int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); - int has_vfpv4 = (hwcaps & HWCAP_VFPv4); - int has_neon = (hwcaps & HWCAP_NEON); - int has_idiva = (hwcaps & HWCAP_IDIVA); - int has_idivt = (hwcaps & HWCAP_IDIVT); - int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); - - // The kernel does a poor job at ensuring consistency when - // describing CPU features. So lots of guessing is needed. - - // 'vfpv4' implies VFPv3|VFP_FMA|FP16 - if (has_vfpv4) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | - ANDROID_CPU_ARM_FEATURE_VFP_FP16 | - ANDROID_CPU_ARM_FEATURE_VFP_FMA; - - // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, - // a value of 'vfpv3' doesn't necessarily mean that the D32 - // feature is present, so be conservative. All CPUs in the - // field that support D32 also support NEON, so this should - // not be a problem in practice. - if (has_vfpv3 || has_vfpv3d16) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; - - // 'vfp' is super ambiguous. Depending on the kernel, it can - // either mean VFPv2 or VFPv3. Make it depend on ARMv7. - if (has_vfp) { - if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; - else - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; - } - - // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA - if (has_neon) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | - ANDROID_CPU_ARM_FEATURE_NEON | - ANDROID_CPU_ARM_FEATURE_VFP_D32; - if (has_vfpv4) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; - } - - // VFPv3 implies VFPv2 and ARMv7 - if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | - ANDROID_CPU_ARM_FEATURE_ARMv7; - - if (has_idiva) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; - if (has_idivt) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; - - if (has_iwmmxt) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; - } - - /* Extract the list of CPU features from ELF hwcaps2 */ - uint32_t hwcaps2 = 0; - hwcaps2 = get_elf_hwcap_from_getauxval(AT_HWCAP2); - if (hwcaps2 != 0) { - int has_aes = (hwcaps2 & HWCAP2_AES); - int has_pmull = (hwcaps2 & HWCAP2_PMULL); - int has_sha1 = (hwcaps2 & HWCAP2_SHA1); - int has_sha2 = (hwcaps2 & HWCAP2_SHA2); - int has_crc32 = (hwcaps2 & HWCAP2_CRC32); - - if (has_aes) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_AES; - if (has_pmull) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_PMULL; - if (has_sha1) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA1; - if (has_sha2) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA2; - if (has_crc32) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_CRC32; - } - /* Extract the cpuid value from various fields */ - // The CPUID value is broken up in several entries in /proc/cpuinfo. - // This table is used to rebuild it from the entries. - static const struct CpuIdEntry { - const char* field; - char format; - char bit_lshift; - char bit_length; - } cpu_id_entries[] = { - { "CPU implementer", 'x', 24, 8 }, - { "CPU variant", 'x', 20, 4 }, - { "CPU part", 'x', 4, 12 }, - { "CPU revision", 'd', 0, 4 }, - }; - size_t i; - D("Parsing /proc/cpuinfo to recover CPUID\n"); - for (i = 0; - i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]); - ++i) { - const struct CpuIdEntry* entry = &cpu_id_entries[i]; - char* value = extract_cpuinfo_field(cpuinfo, - cpuinfo_len, - entry->field); - if (value == NULL) - continue; - - D("field=%s value='%s'\n", entry->field, value); - char* value_end = value + strlen(value); - int val = 0; - const char* start = value; - const char* p; - if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) { - start += 2; - p = parse_hexadecimal(start, value_end, &val); - } else if (entry->format == 'x') - p = parse_hexadecimal(value, value_end, &val); - else - p = parse_decimal(value, value_end, &val); - - if (p > (const char*)start) { - val &= ((1 << entry->bit_length)-1); - val <<= entry->bit_lshift; - g_cpuIdArm |= (uint32_t) val; - } - - free(value); - } - - // Handle kernel configuration bugs that prevent the correct - // reporting of CPU features. - static const struct CpuFix { - uint32_t cpuid; - uint64_t or_flags; - } cpu_fixes[] = { - /* The Nexus 4 (Qualcomm Krait) kernel configuration - * forgets to report IDIV support. */ - { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, - { 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, - }; - size_t n; - for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) { - const struct CpuFix* entry = &cpu_fixes[n]; - - if (g_cpuIdArm == entry->cpuid) - g_cpuFeatures |= entry->or_flags; - } - - // Special case: The emulator-specific Android 4.2 kernel fails - // to report support for the 32-bit ARM IDIV instruction. - // Technically, this is a feature of the virtual CPU implemented - // by the emulator. Note that it could also support Thumb IDIV - // in the future, and this will have to be slightly updated. - char* hardware = extract_cpuinfo_field(cpuinfo, - cpuinfo_len, - "Hardware"); - if (hardware) { - if (!strcmp(hardware, "Goldfish") && - g_cpuIdArm == 0x4100c080 && - (g_cpuFamily & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; - } - free(hardware); - } - } -#endif /* __arm__ */ -#ifdef __aarch64__ - { - /* Extract the list of CPU features from ELF hwcaps */ - uint32_t hwcaps = 0; - hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP); - if (hwcaps != 0) { - int has_fp = (hwcaps & HWCAP_FP); - int has_asimd = (hwcaps & HWCAP_ASIMD); - int has_aes = (hwcaps & HWCAP_AES); - int has_pmull = (hwcaps & HWCAP_PMULL); - int has_sha1 = (hwcaps & HWCAP_SHA1); - int has_sha2 = (hwcaps & HWCAP_SHA2); - int has_crc32 = (hwcaps & HWCAP_CRC32); - - if(has_fp == 0) { - D("ERROR: Floating-point unit missing, but is required by Android on AArch64 CPUs\n"); - } - if(has_asimd == 0) { - D("ERROR: ASIMD unit missing, but is required by Android on AArch64 CPUs\n"); - } - - if (has_fp) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_FP; - if (has_asimd) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_ASIMD; - if (has_aes) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_AES; - if (has_pmull) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_PMULL; - if (has_sha1) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA1; - if (has_sha2) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA2; - if (has_crc32) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_CRC32; - } - } -#endif /* __aarch64__ */ - -#if defined(__i386__) || defined(__x86_64__) - int regs[4]; - -/* According to http://en.wikipedia.org/wiki/CPUID */ -#define VENDOR_INTEL_b 0x756e6547 -#define VENDOR_INTEL_c 0x6c65746e -#define VENDOR_INTEL_d 0x49656e69 - - x86_cpuid(0, regs); - int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && - regs[2] == VENDOR_INTEL_c && - regs[3] == VENDOR_INTEL_d); - - x86_cpuid(1, regs); - if ((regs[2] & (1 << 9)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; - } - if ((regs[2] & (1 << 23)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; - } - if ((regs[2] & (1 << 19)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_1; - } - if ((regs[2] & (1 << 20)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_2; - } - if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; - } - if ((regs[2] & (1 << 25)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AES_NI; - } - if ((regs[2] & (1 << 28)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX; - } - if ((regs[2] & (1 << 30)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_RDRAND; - } - - x86_cpuid(7, regs); - if ((regs[1] & (1 << 5)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX2; - } - if ((regs[1] & (1 << 29)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SHA_NI; - } - - -#endif -#if defined( __mips__) - { /* MIPS and MIPS64 */ - /* Extract the list of CPU features from ELF hwcaps */ - uint32_t hwcaps = 0; - hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP); - if (hwcaps != 0) { - int has_r6 = (hwcaps & HWCAP_MIPS_R6); - int has_msa = (hwcaps & HWCAP_MIPS_MSA); - if (has_r6) - g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_R6; - if (has_msa) - g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_MSA; - } - } -#endif /* __mips__ */ - - free(cpuinfo); -} - - -AndroidCpuFamily -android_getCpuFamily(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuFamily; -} - - -uint64_t -android_getCpuFeatures(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuFeatures; -} - - -int -android_getCpuCount(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuCount; -} - -static void -android_cpuInitDummy(void) -{ - g_inited = 1; -} - -int -android_setCpu(int cpu_count, uint64_t cpu_features) -{ - /* Fail if the library was already initialized. */ - if (g_inited) - return 0; - - android_cpuInitFamily(); - g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); - g_cpuFeatures = cpu_features; - pthread_once(&g_once, android_cpuInitDummy); - - return 1; -} - -#ifdef __arm__ -uint32_t -android_getCpuIdArm(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuIdArm; -} - -int -android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) -{ - if (!android_setCpu(cpu_count, cpu_features)) - return 0; - - g_cpuIdArm = cpu_id; - return 1; -} -#endif /* __arm__ */ - -/* - * Technical note: Making sense of ARM's FPU architecture versions. - * - * FPA was ARM's first attempt at an FPU architecture. There is no Android - * device that actually uses it since this technology was already obsolete - * when the project started. If you see references to FPA instructions - * somewhere, you can be sure that this doesn't apply to Android at all. - * - * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of - * new versions / additions to it. ARM considers this obsolete right now, - * and no known Android device implements it either. - * - * VFPv2 added a few instructions to VFPv1, and is an *optional* extension - * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device - * supporting the 'armeabi' ABI doesn't necessarily support these. - * - * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used - * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated - * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means - * that it provides 16 double-precision FPU registers (d0-d15) and 32 - * single-precision ones (s0-s31) which happen to be mapped to the same - * register banks. - * - * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 - * additional double precision registers (d16-d31). Note that there are - * still only 32 single precision registers. - * - * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision - * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which - * are not supported by Android. Note that it is not compatible with VFPv2. - * - * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 - * depending on context. For example GCC uses it for VFPv3-D32, but - * the Linux kernel code uses it for VFPv3-D16 (especially in - * /proc/cpuinfo). Always try to use the full designation when - * possible. - * - * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides - * instructions to perform parallel computations on vectors of 8, 16, - * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all - * NEON registers are also mapped to the same register banks. - * - * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to - * perform fused multiply-accumulate on VFP registers, as well as - * half-precision (16-bit) conversion operations. - * - * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision - * registers. - * - * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused - * multiply-accumulate instructions that work on the NEON registers. - * - * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 - * depending on context. - * - * The following information was determined by scanning the binutils-2.22 - * sources: - * - * Basic VFP instruction subsets: - * - * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. - * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. - * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. - * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. - * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. - * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. - * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. - * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. - * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add - * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add - * - * FPU types (excluding NEON) - * - * FPU_VFP_V1xD (EXT_V1xD) - * | - * +--------------------------+ - * | | - * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) - * | | - * | | - * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) - * | - * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) - * | - * +--------------------------+ - * | | - * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) - * | | - * | FPU_VFP_V4 (+EXT_D32) - * | - * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) - * - * VFP architectures: - * - * ARCH_VFP_V1xD (EXT_V1xD) - * | - * +------------------+ - * | | - * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) - * | | - * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) - * | | - * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) - * | - * ARCH_VFP_V1 (+EXT_V1) - * | - * ARCH_VFP_V2 (+EXT_V2) - * | - * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) - * | | - * | ARCH_VFP_V4 (+EXT_D32) - * | | - * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) - * | - * ARCH_VFP_V3 (+EXT_D32) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3_FP16 (+EXT_FP16) - * | - * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) - * | - * ARCH_NEON_FP16 (+EXT_FP16) - * - * -fpu= values and their correspondance with FPU architectures above: - * - * {"vfp", FPU_ARCH_VFP_V2}, - * {"vfp9", FPU_ARCH_VFP_V2}, - * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. - * {"vfp10", FPU_ARCH_VFP_V2}, - * {"vfp10-r0", FPU_ARCH_VFP_V1}, - * {"vfpxd", FPU_ARCH_VFP_V1xD}, - * {"vfpv2", FPU_ARCH_VFP_V2}, - * {"vfpv3", FPU_ARCH_VFP_V3}, - * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, - * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, - * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, - * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, - * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, - * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, - * {"neon-fp16", FPU_ARCH_NEON_FP16}, - * {"vfpv4", FPU_ARCH_VFP_V4}, - * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, - * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, - * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, - * - * - * Simplified diagram that only includes FPUs supported by Android: - * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, - * all others are optional and must be probed at runtime. - * - * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) - * | | - * | ARCH_VFP_V4 (+EXT_D32) - * | | - * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) - * | - * ARCH_VFP_V3 (+EXT_D32) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3_FP16 (+EXT_FP16) - * | - * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) - * | - * ARCH_NEON_FP16 (+EXT_FP16) - * - */ diff --git a/thirdparty/libvpx/third_party/android/cpu-features.h b/thirdparty/libvpx/third_party/android/cpu-features.h deleted file mode 100644 index 1e9724197a..0000000000 --- a/thirdparty/libvpx/third_party/android/cpu-features.h +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#ifndef CPU_FEATURES_H -#define CPU_FEATURES_H - -#include -#include - -__BEGIN_DECLS - -/* A list of valid values returned by android_getCpuFamily(). - * They describe the CPU Architecture of the current process. - */ -typedef enum { - ANDROID_CPU_FAMILY_UNKNOWN = 0, - ANDROID_CPU_FAMILY_ARM, - ANDROID_CPU_FAMILY_X86, - ANDROID_CPU_FAMILY_MIPS, - ANDROID_CPU_FAMILY_ARM64, - ANDROID_CPU_FAMILY_X86_64, - ANDROID_CPU_FAMILY_MIPS64, - - ANDROID_CPU_FAMILY_MAX /* do not remove */ - -} AndroidCpuFamily; - -/* Return the CPU family of the current process. - * - * Note that this matches the bitness of the current process. I.e. when - * running a 32-bit binary on a 64-bit capable CPU, this will return the - * 32-bit CPU family value. - */ -extern AndroidCpuFamily android_getCpuFamily(void); - -/* Return a bitmap describing a set of optional CPU features that are - * supported by the current device's CPU. The exact bit-flags returned - * depend on the value returned by android_getCpuFamily(). See the - * documentation for the ANDROID_CPU_*_FEATURE_* flags below for details. - */ -extern uint64_t android_getCpuFeatures(void); - -/* The list of feature flags for ANDROID_CPU_FAMILY_ARM that can be - * recognized by the library (see note below for 64-bit ARM). Value details - * are: - * - * VFPv2: - * CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs - * support these instructions. VFPv2 is a subset of VFPv3 so this will - * be set whenever VFPv3 is set too. - * - * ARMv7: - * CPU supports the ARMv7-A basic instruction set. - * This feature is mandated by the 'armeabi-v7a' ABI. - * - * VFPv3: - * CPU supports the VFPv3-D16 instruction set, providing hardware FPU - * support for single and double precision floating point registers. - * Note that only 16 FPU registers are available by default, unless - * the D32 bit is set too. This feature is also mandated by the - * 'armeabi-v7a' ABI. - * - * VFP_D32: - * CPU VFP optional extension that provides 32 FPU registers, - * instead of 16. Note that ARM mandates this feature is the 'NEON' - * feature is implemented by the CPU. - * - * NEON: - * CPU FPU supports "ARM Advanced SIMD" instructions, also known as - * NEON. Note that this mandates the VFP_D32 feature as well, per the - * ARM Architecture specification. - * - * VFP_FP16: - * Half-width floating precision VFP extension. If set, the CPU - * supports instructions to perform floating-point operations on - * 16-bit registers. This is part of the VFPv4 specification, but - * not mandated by any Android ABI. - * - * VFP_FMA: - * Fused multiply-accumulate VFP instructions extension. Also part of - * the VFPv4 specification, but not mandated by any Android ABI. - * - * NEON_FMA: - * Fused multiply-accumulate NEON instructions extension. Optional - * extension from the VFPv4 specification, but not mandated by any - * Android ABI. - * - * IDIV_ARM: - * Integer division available in ARM mode. Only available - * on recent CPUs (e.g. Cortex-A15). - * - * IDIV_THUMB2: - * Integer division available in Thumb-2 mode. Only available - * on recent CPUs (e.g. Cortex-A15). - * - * iWMMXt: - * Optional extension that adds MMX registers and operations to an - * ARM CPU. This is only available on a few XScale-based CPU designs - * sold by Marvell. Pretty rare in practice. - * - * AES: - * CPU supports AES instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * CRC32: - * CPU supports CRC32 instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * SHA2: - * CPU supports SHA2 instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * SHA1: - * CPU supports SHA1 instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * PMULL: - * CPU supports 64-bit PMULL and PMULL2 instructions. These - * instructions are only available for 32-bit applications - * running on ARMv8 CPU. - * - * If you want to tell the compiler to generate code that targets one of - * the feature set above, you should probably use one of the following - * flags (for more details, see technical note at the end of this file): - * - * -mfpu=vfp - * -mfpu=vfpv2 - * These are equivalent and tell GCC to use VFPv2 instructions for - * floating-point operations. Use this if you want your code to - * run on *some* ARMv6 devices, and any ARMv7-A device supported - * by Android. - * - * Generated code requires VFPv2 feature. - * - * -mfpu=vfpv3-d16 - * Tell GCC to use VFPv3 instructions (using only 16 FPU registers). - * This should be generic code that runs on any CPU that supports the - * 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this. - * - * Generated code requires VFPv3 feature. - * - * -mfpu=vfpv3 - * Tell GCC to use VFPv3 instructions with 32 FPU registers. - * Generated code requires VFPv3|VFP_D32 features. - * - * -mfpu=neon - * Tell GCC to use VFPv3 instructions with 32 FPU registers, and - * also support NEON intrinsics (see ). - * Generated code requires VFPv3|VFP_D32|NEON features. - * - * -mfpu=vfpv4-d16 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA features. - * - * -mfpu=vfpv4 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features. - * - * -mfpu=neon-vfpv4 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA - * features. - * - * -mcpu=cortex-a7 - * -mcpu=cortex-a15 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32| - * NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2 - * This flag implies -mfpu=neon-vfpv4. - * - * -mcpu=iwmmxt - * Allows the use of iWMMXt instrinsics with GCC. - * - * IMPORTANT NOTE: These flags should only be tested when - * android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM, i.e. this is a - * 32-bit process. - * - * When running a 64-bit ARM process on an ARMv8 CPU, - * android_getCpuFeatures() will return a different set of bitflags - */ -enum { - ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0), - ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1), - ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2), - ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3), - ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4), - ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5), - ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6), - ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7), - ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8), - ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9), - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10), - ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11), - ANDROID_CPU_ARM_FEATURE_AES = (1 << 12), - ANDROID_CPU_ARM_FEATURE_PMULL = (1 << 13), - ANDROID_CPU_ARM_FEATURE_SHA1 = (1 << 14), - ANDROID_CPU_ARM_FEATURE_SHA2 = (1 << 15), - ANDROID_CPU_ARM_FEATURE_CRC32 = (1 << 16), -}; - -/* The bit flags corresponding to the output of android_getCpuFeatures() - * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM64. Value details - * are: - * - * FP: - * CPU has Floating-point unit. - * - * ASIMD: - * CPU has Advanced SIMD unit. - * - * AES: - * CPU supports AES instructions. - * - * CRC32: - * CPU supports CRC32 instructions. - * - * SHA2: - * CPU supports SHA2 instructions. - * - * SHA1: - * CPU supports SHA1 instructions. - * - * PMULL: - * CPU supports 64-bit PMULL and PMULL2 instructions. - */ -enum { - ANDROID_CPU_ARM64_FEATURE_FP = (1 << 0), - ANDROID_CPU_ARM64_FEATURE_ASIMD = (1 << 1), - ANDROID_CPU_ARM64_FEATURE_AES = (1 << 2), - ANDROID_CPU_ARM64_FEATURE_PMULL = (1 << 3), - ANDROID_CPU_ARM64_FEATURE_SHA1 = (1 << 4), - ANDROID_CPU_ARM64_FEATURE_SHA2 = (1 << 5), - ANDROID_CPU_ARM64_FEATURE_CRC32 = (1 << 6), -}; - -/* The bit flags corresponding to the output of android_getCpuFeatures() - * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_X86 or - * ANDROID_CPU_FAMILY_X86_64. - */ -enum { - ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0), - ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1), - ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2), - ANDROID_CPU_X86_FEATURE_SSE4_1 = (1 << 3), - ANDROID_CPU_X86_FEATURE_SSE4_2 = (1 << 4), - ANDROID_CPU_X86_FEATURE_AES_NI = (1 << 5), - ANDROID_CPU_X86_FEATURE_AVX = (1 << 6), - ANDROID_CPU_X86_FEATURE_RDRAND = (1 << 7), - ANDROID_CPU_X86_FEATURE_AVX2 = (1 << 8), - ANDROID_CPU_X86_FEATURE_SHA_NI = (1 << 9), -}; - -/* The bit flags corresponding to the output of android_getCpuFeatures() - * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_MIPS - * or ANDROID_CPU_FAMILY_MIPS64. Values are: - * - * R6: - * CPU executes MIPS Release 6 instructions natively, and - * supports obsoleted R1..R5 instructions only via kernel traps. - * - * MSA: - * CPU supports Mips SIMD Architecture instructions. - */ -enum { - ANDROID_CPU_MIPS_FEATURE_R6 = (1 << 0), - ANDROID_CPU_MIPS_FEATURE_MSA = (1 << 1), -}; - - -/* Return the number of CPU cores detected on this device. */ -extern int android_getCpuCount(void); - -/* The following is used to force the CPU count and features - * mask in sandboxed processes. Under 4.1 and higher, these processes - * cannot access /proc, which is the only way to get information from - * the kernel about the current hardware (at least on ARM). - * - * It _must_ be called only once, and before any android_getCpuXXX - * function, any other case will fail. - * - * This function return 1 on success, and 0 on failure. - */ -extern int android_setCpu(int cpu_count, - uint64_t cpu_features); - -#ifdef __arm__ -/* Retrieve the ARM 32-bit CPUID value from the kernel. - * Note that this cannot work on sandboxed processes under 4.1 and - * higher, unless you called android_setCpuArm() before. - */ -extern uint32_t android_getCpuIdArm(void); - -/* An ARM-specific variant of android_setCpu() that also allows you - * to set the ARM CPUID field. - */ -extern int android_setCpuArm(int cpu_count, - uint64_t cpu_features, - uint32_t cpu_id); -#endif - -__END_DECLS - -#endif /* CPU_FEATURES_H */ diff --git a/thirdparty/libvpx/third_party/x86inc/LICENSE b/thirdparty/libvpx/third_party/x86inc/LICENSE deleted file mode 100644 index 7d07645a17..0000000000 --- a/thirdparty/libvpx/third_party/x86inc/LICENSE +++ /dev/null @@ -1,18 +0,0 @@ -Copyright (C) 2005-2012 x264 project - -Authors: Loren Merritt - Anton Mitrofanov - Jason Garrett-Glaser - Henrik Gramner - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/thirdparty/libvpx/third_party/x86inc/README.libvpx b/thirdparty/libvpx/third_party/x86inc/README.libvpx deleted file mode 100644 index 8d3cd966da..0000000000 --- a/thirdparty/libvpx/third_party/x86inc/README.libvpx +++ /dev/null @@ -1,20 +0,0 @@ -URL: https://git.videolan.org/git/x264.git -Version: d23d18655249944c1ca894b451e2c82c7a584c62 -License: ISC -License File: LICENSE - -Description: -x264/libav's framework for x86 assembly. Contains a variety of macros and -defines that help automatically allow assembly to work cross-platform. - -Local Modifications: -Get configuration from vpx_config.asm. -Prefix functions with vpx by default. -Manage name mangling (prefixing with '_') manually because 'PREFIX' does not - exist in libvpx. -Expand PIC default to macho64 and respect CONFIG_PIC from libvpx -Set 'private_extern' visibility for macho targets. -Copy PIC 'GLOBAL' macros from x86_abi_support.asm -Use .text instead of .rodata on macho to avoid broken tables in PIC mode. -Use .text with no alignment for aout -Only use 'hidden' visibility with Chromium diff --git a/thirdparty/libvpx/third_party/x86inc/x86inc.asm b/thirdparty/libvpx/third_party/x86inc/x86inc.asm deleted file mode 100644 index b647dff2f8..0000000000 --- a/thirdparty/libvpx/third_party/x86inc/x86inc.asm +++ /dev/null @@ -1,1649 +0,0 @@ -;***************************************************************************** -;* x86inc.asm: x264asm abstraction layer -;***************************************************************************** -;* Copyright (C) 2005-2016 x264 project -;* -;* Authors: Loren Merritt -;* Anton Mitrofanov -;* Fiona Glaser -;* Henrik Gramner -;* -;* Permission to use, copy, modify, and/or distribute this software for any -;* purpose with or without fee is hereby granted, provided that the above -;* copyright notice and this permission notice appear in all copies. -;* -;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -;***************************************************************************** - -; This is a header file for the x264ASM assembly language, which uses -; NASM/YASM syntax combined with a large number of macros to provide easy -; abstraction between different calling conventions (x86_32, win64, linux64). -; It also has various other useful features to simplify writing the kind of -; DSP functions that are most often used in x264. - -; Unlike the rest of x264, this file is available under an ISC license, as it -; has significant usefulness outside of x264 and we want it to be available -; to the largest audience possible. Of course, if you modify it for your own -; purposes to add a new feature, we strongly encourage contributing a patch -; as this feature might be useful for others as well. Send patches or ideas -; to x264-devel@videolan.org . - -%include "vpx_config.asm" - -%ifndef private_prefix - %define private_prefix vpx -%endif - -%ifndef public_prefix - %define public_prefix private_prefix -%endif - -%ifndef STACK_ALIGNMENT - %if ARCH_X86_64 - %define STACK_ALIGNMENT 16 - %else - %define STACK_ALIGNMENT 4 - %endif -%endif - -%define WIN64 0 -%define UNIX64 0 -%if ARCH_X86_64 - %ifidn __OUTPUT_FORMAT__,win32 - %define WIN64 1 - %elifidn __OUTPUT_FORMAT__,win64 - %define WIN64 1 - %elifidn __OUTPUT_FORMAT__,x64 - %define WIN64 1 - %else - %define UNIX64 1 - %endif -%endif - -%define FORMAT_ELF 0 -%ifidn __OUTPUT_FORMAT__,elf - %define FORMAT_ELF 1 -%elifidn __OUTPUT_FORMAT__,elf32 - %define FORMAT_ELF 1 -%elifidn __OUTPUT_FORMAT__,elf64 - %define FORMAT_ELF 1 -%endif - -%define FORMAT_MACHO 0 -%ifidn __OUTPUT_FORMAT__,macho32 - %define FORMAT_MACHO 1 -%elifidn __OUTPUT_FORMAT__,macho64 - %define FORMAT_MACHO 1 -%endif - -; Set PREFIX for libvpx builds. -%if FORMAT_ELF - %undef PREFIX -%elif WIN64 - %undef PREFIX -%else - %define PREFIX -%endif - -%ifdef PREFIX - %define mangle(x) _ %+ x -%else - %define mangle(x) x -%endif - -; In some instances macho32 tables get misaligned when using .rodata. -; When looking at the disassembly it appears that the offset is either -; correct or consistently off by 90. Placing them in the .text section -; works around the issue. It appears to be specific to the way libvpx -; handles the tables. -%macro SECTION_RODATA 0-1 16 - %ifidn __OUTPUT_FORMAT__,macho32 - SECTION .text align=%1 - fakegot: - %elifidn __OUTPUT_FORMAT__,aout - SECTION .text - %else - SECTION .rodata align=%1 - %endif -%endmacro - -; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC" -; from original code is added in for 64bit. -%ifidn __OUTPUT_FORMAT__,elf32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,macho32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,win32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,aout -%define ABI_IS_32BIT 1 -%else -%define ABI_IS_32BIT 0 -%endif - -%if ABI_IS_32BIT - %if CONFIG_PIC=1 - %ifidn __OUTPUT_FORMAT__,elf32 - %define GET_GOT_DEFINED 1 - %define WRT_PLT wrt ..plt - %macro GET_GOT 1 - extern _GLOBAL_OFFSET_TABLE_ - push %1 - call %%get_got - %%sub_offset: - jmp %%exitGG - %%get_got: - mov %1, [esp] - add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc - ret - %%exitGG: - %undef GLOBAL - %define GLOBAL(x) x + %1 wrt ..gotoff - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %elifidn __OUTPUT_FORMAT__,macho32 - %define GET_GOT_DEFINED 1 - %macro GET_GOT 1 - push %1 - call %%get_got - %%get_got: - pop %1 - %undef GLOBAL - %define GLOBAL(x) x + %1 - %%get_got - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %else - %define GET_GOT_DEFINED 0 - %endif - %endif - - %if ARCH_X86_64 == 0 - %undef PIC - %endif - -%else - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) rel x - %define WRT_PLT wrt ..plt - - %if WIN64 - %define PIC - %elifidn __OUTPUT_FORMAT__,macho64 - %define PIC - %elif CONFIG_PIC - %define PIC - %endif -%endif - -%ifnmacro GET_GOT - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) x -%endif -%ifndef RESTORE_GOT - %define RESTORE_GOT -%endif -%ifndef WRT_PLT - %define WRT_PLT -%endif - -%ifdef PIC - default rel -%endif - -%ifndef GET_GOT_DEFINED - %define GET_GOT_DEFINED 0 -%endif -; Done with PIC macros - -%ifdef __NASM_VER__ - %use smartalign -%endif - -; Macros to eliminate most code duplication between x86_32 and x86_64: -; Currently this works only for leaf functions which load all their arguments -; into registers at the start, and make no other use of the stack. Luckily that -; covers most of x264's asm. - -; PROLOGUE: -; %1 = number of arguments. loads them from stack if needed. -; %2 = number of registers used. pushes callee-saved regs if needed. -; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. -; %4 = (optional) stack size to be allocated. The stack will be aligned before -; allocating the specified stack size. If the required stack alignment is -; larger than the known stack alignment the stack will be manually aligned -; and an extra register will be allocated to hold the original stack -; pointer (to not invalidate r0m etc.). To prevent the use of an extra -; register as stack pointer, request a negative stack size. -; %4+/%5+ = list of names to define to registers -; PROLOGUE can also be invoked by adding the same options to cglobal - -; e.g. -; cglobal foo, 2,3,7,0x40, dst, src, tmp -; declares a function (foo) that automatically loads two arguments (dst and -; src) into registers, uses one additional register (tmp) plus 7 vector -; registers (m0-m6) and allocates 0x40 bytes of stack space. - -; TODO Some functions can use some args directly from the stack. If they're the -; last args then you can just not declare them, but if they're in the middle -; we need more flexible macro. - -; RET: -; Pops anything that was pushed by PROLOGUE, and returns. - -; REP_RET: -; Use this instead of RET if it's a branch target. - -; registers: -; rN and rNq are the native-size register holding function argument N -; rNd, rNw, rNb are dword, word, and byte size -; rNh is the high 8 bits of the word size -; rNm is the original location of arg N (a register or on the stack), dword -; rNmp is native size - -%macro DECLARE_REG 2-3 - %define r%1q %2 - %define r%1d %2d - %define r%1w %2w - %define r%1b %2b - %define r%1h %2h - %define %2q %2 - %if %0 == 2 - %define r%1m %2d - %define r%1mp %2 - %elif ARCH_X86_64 ; memory - %define r%1m [rstk + stack_offset + %3] - %define r%1mp qword r %+ %1 %+ m - %else - %define r%1m [rstk + stack_offset + %3] - %define r%1mp dword r %+ %1 %+ m - %endif - %define r%1 %2 -%endmacro - -%macro DECLARE_REG_SIZE 3 - %define r%1q r%1 - %define e%1q r%1 - %define r%1d e%1 - %define e%1d e%1 - %define r%1w %1 - %define e%1w %1 - %define r%1h %3 - %define e%1h %3 - %define r%1b %2 - %define e%1b %2 - %if ARCH_X86_64 == 0 - %define r%1 e%1 - %endif -%endmacro - -DECLARE_REG_SIZE ax, al, ah -DECLARE_REG_SIZE bx, bl, bh -DECLARE_REG_SIZE cx, cl, ch -DECLARE_REG_SIZE dx, dl, dh -DECLARE_REG_SIZE si, sil, null -DECLARE_REG_SIZE di, dil, null -DECLARE_REG_SIZE bp, bpl, null - -; t# defines for when per-arch register allocation is more complex than just function arguments - -%macro DECLARE_REG_TMP 1-* - %assign %%i 0 - %rep %0 - CAT_XDEFINE t, %%i, r%1 - %assign %%i %%i+1 - %rotate 1 - %endrep -%endmacro - -%macro DECLARE_REG_TMP_SIZE 0-* - %rep %0 - %define t%1q t%1 %+ q - %define t%1d t%1 %+ d - %define t%1w t%1 %+ w - %define t%1h t%1 %+ h - %define t%1b t%1 %+ b - %rotate 1 - %endrep -%endmacro - -DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 - -%if ARCH_X86_64 - %define gprsize 8 -%else - %define gprsize 4 -%endif - -%macro PUSH 1 - push %1 - %ifidn rstk, rsp - %assign stack_offset stack_offset+gprsize - %endif -%endmacro - -%macro POP 1 - pop %1 - %ifidn rstk, rsp - %assign stack_offset stack_offset-gprsize - %endif -%endmacro - -%macro PUSH_IF_USED 1-* - %rep %0 - %if %1 < regs_used - PUSH r%1 - %endif - %rotate 1 - %endrep -%endmacro - -%macro POP_IF_USED 1-* - %rep %0 - %if %1 < regs_used - pop r%1 - %endif - %rotate 1 - %endrep -%endmacro - -%macro LOAD_IF_USED 1-* - %rep %0 - %if %1 < num_args - mov r%1, r %+ %1 %+ mp - %endif - %rotate 1 - %endrep -%endmacro - -%macro SUB 2 - sub %1, %2 - %ifidn %1, rstk - %assign stack_offset stack_offset+(%2) - %endif -%endmacro - -%macro ADD 2 - add %1, %2 - %ifidn %1, rstk - %assign stack_offset stack_offset-(%2) - %endif -%endmacro - -%macro movifnidn 2 - %ifnidn %1, %2 - mov %1, %2 - %endif -%endmacro - -%macro movsxdifnidn 2 - %ifnidn %1, %2 - movsxd %1, %2 - %endif -%endmacro - -%macro ASSERT 1 - %if (%1) == 0 - %error assertion ``%1'' failed - %endif -%endmacro - -%macro DEFINE_ARGS 0-* - %ifdef n_arg_names - %assign %%i 0 - %rep n_arg_names - CAT_UNDEF arg_name %+ %%i, q - CAT_UNDEF arg_name %+ %%i, d - CAT_UNDEF arg_name %+ %%i, w - CAT_UNDEF arg_name %+ %%i, h - CAT_UNDEF arg_name %+ %%i, b - CAT_UNDEF arg_name %+ %%i, m - CAT_UNDEF arg_name %+ %%i, mp - CAT_UNDEF arg_name, %%i - %assign %%i %%i+1 - %endrep - %endif - - %xdefine %%stack_offset stack_offset - %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine - %assign %%i 0 - %rep %0 - %xdefine %1q r %+ %%i %+ q - %xdefine %1d r %+ %%i %+ d - %xdefine %1w r %+ %%i %+ w - %xdefine %1h r %+ %%i %+ h - %xdefine %1b r %+ %%i %+ b - %xdefine %1m r %+ %%i %+ m - %xdefine %1mp r %+ %%i %+ mp - CAT_XDEFINE arg_name, %%i, %1 - %assign %%i %%i+1 - %rotate 1 - %endrep - %xdefine stack_offset %%stack_offset - %assign n_arg_names %0 -%endmacro - -%define required_stack_alignment ((mmsize + 15) & ~15) - -%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) - %ifnum %1 - %if %1 != 0 - %assign %%pad 0 - %assign stack_size %1 - %if stack_size < 0 - %assign stack_size -stack_size - %endif - %if WIN64 - %assign %%pad %%pad + 32 ; shadow space - %if mmsize != 8 - %assign xmm_regs_used %2 - %if xmm_regs_used > 8 - %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers - %endif - %endif - %endif - %if required_stack_alignment <= STACK_ALIGNMENT - ; maintain the current stack alignment - %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) - SUB rsp, stack_size_padded - %else - %assign %%reg_num (regs_used - 1) - %xdefine rstk r %+ %%reg_num - ; align stack, and save original stack location directly above - ; it, i.e. in [rsp+stack_size_padded], so we can restore the - ; stack in a single instruction (i.e. mov rsp, rstk or mov - ; rsp, [rsp+stack_size_padded]) - %if %1 < 0 ; need to store rsp on stack - %xdefine rstkm [rsp + stack_size + %%pad] - %assign %%pad %%pad + gprsize - %else ; can keep rsp in rstk during whole function - %xdefine rstkm rstk - %endif - %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1)) - mov rstk, rsp - and rsp, ~(required_stack_alignment-1) - sub rsp, stack_size_padded - movifnidn rstkm, rstk - %endif - WIN64_PUSH_XMM - %endif - %endif -%endmacro - -%macro SETUP_STACK_POINTER 1 - %ifnum %1 - %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT - %if %1 > 0 - %assign regs_used (regs_used + 1) - %endif - %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 - ; Ensure that we don't clobber any registers containing arguments - %assign regs_used 5 + UNIX64 * 3 - %endif - %endif - %endif -%endmacro - -%macro DEFINE_ARGS_INTERNAL 3+ - %ifnum %2 - DEFINE_ARGS %3 - %elif %1 == 4 - DEFINE_ARGS %2 - %elif %1 > 4 - DEFINE_ARGS %2, %3 - %endif -%endmacro - -%if WIN64 ; Windows x64 ;================================================= - -DECLARE_REG 0, rcx -DECLARE_REG 1, rdx -DECLARE_REG 2, R8 -DECLARE_REG 3, R9 -DECLARE_REG 4, R10, 40 -DECLARE_REG 5, R11, 48 -DECLARE_REG 6, rax, 56 -DECLARE_REG 7, rdi, 64 -DECLARE_REG 8, rsi, 72 -DECLARE_REG 9, rbx, 80 -DECLARE_REG 10, rbp, 88 -DECLARE_REG 11, R12, 96 -DECLARE_REG 12, R13, 104 -DECLARE_REG 13, R14, 112 -DECLARE_REG 14, R15, 120 - -%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - SETUP_STACK_POINTER %4 - ASSERT regs_used <= 15 - PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 - ALLOC_STACK %4, %3 - %if mmsize != 8 && stack_size == 0 - WIN64_SPILL_XMM %3 - %endif - LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS_INTERNAL %0, %4, %5 -%endmacro - -%macro WIN64_PUSH_XMM 0 - ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated. - %if xmm_regs_used > 6 - movaps [rstk + stack_offset + 8], xmm6 - %endif - %if xmm_regs_used > 7 - movaps [rstk + stack_offset + 24], xmm7 - %endif - %if xmm_regs_used > 8 - %assign %%i 8 - %rep xmm_regs_used-8 - movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i - %assign %%i %%i+1 - %endrep - %endif -%endmacro - -%macro WIN64_SPILL_XMM 1 - %assign xmm_regs_used %1 - ASSERT xmm_regs_used <= 16 - %if xmm_regs_used > 8 - ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack. - %assign %%pad (xmm_regs_used-8)*16 + 32 - %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) - SUB rsp, stack_size_padded - %endif - WIN64_PUSH_XMM -%endmacro - -%macro WIN64_RESTORE_XMM_INTERNAL 1 - %assign %%pad_size 0 - %if xmm_regs_used > 8 - %assign %%i xmm_regs_used - %rep xmm_regs_used-8 - %assign %%i %%i-1 - movaps xmm %+ %%i, [%1 + (%%i-8)*16 + stack_size + 32] - %endrep - %endif - %if stack_size_padded > 0 - %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm - %else - add %1, stack_size_padded - %assign %%pad_size stack_size_padded - %endif - %endif - %if xmm_regs_used > 7 - movaps xmm7, [%1 + stack_offset - %%pad_size + 24] - %endif - %if xmm_regs_used > 6 - movaps xmm6, [%1 + stack_offset - %%pad_size + 8] - %endif -%endmacro - -%macro WIN64_RESTORE_XMM 1 - WIN64_RESTORE_XMM_INTERNAL %1 - %assign stack_offset (stack_offset-stack_size_padded) - %assign xmm_regs_used 0 -%endmacro - -%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || stack_size > 0 - -%macro RET 0 - WIN64_RESTORE_XMM_INTERNAL rsp - POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 - %if mmsize == 32 - vzeroupper - %endif - AUTO_REP_RET -%endmacro - -%elif ARCH_X86_64 ; *nix x64 ;============================================= - -DECLARE_REG 0, rdi -DECLARE_REG 1, rsi -DECLARE_REG 2, rdx -DECLARE_REG 3, rcx -DECLARE_REG 4, R8 -DECLARE_REG 5, R9 -DECLARE_REG 6, rax, 8 -DECLARE_REG 7, R10, 16 -DECLARE_REG 8, R11, 24 -DECLARE_REG 9, rbx, 32 -DECLARE_REG 10, rbp, 40 -DECLARE_REG 11, R12, 48 -DECLARE_REG 12, R13, 56 -DECLARE_REG 13, R14, 64 -DECLARE_REG 14, R15, 72 - -%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - SETUP_STACK_POINTER %4 - ASSERT regs_used <= 15 - PUSH_IF_USED 9, 10, 11, 12, 13, 14 - ALLOC_STACK %4 - LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS_INTERNAL %0, %4, %5 -%endmacro - -%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0 - -%macro RET 0 - %if stack_size_padded > 0 - %if required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm - %else - add rsp, stack_size_padded - %endif - %endif - POP_IF_USED 14, 13, 12, 11, 10, 9 - %if mmsize == 32 - vzeroupper - %endif - AUTO_REP_RET -%endmacro - -%else ; X86_32 ;============================================================== - -DECLARE_REG 0, eax, 4 -DECLARE_REG 1, ecx, 8 -DECLARE_REG 2, edx, 12 -DECLARE_REG 3, ebx, 16 -DECLARE_REG 4, esi, 20 -DECLARE_REG 5, edi, 24 -DECLARE_REG 6, ebp, 28 -%define rsp esp - -%macro DECLARE_ARG 1-* - %rep %0 - %define r%1m [rstk + stack_offset + 4*%1 + 4] - %define r%1mp dword r%1m - %rotate 1 - %endrep -%endmacro - -DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 - -%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - %if num_args > 7 - %assign num_args 7 - %endif - %if regs_used > 7 - %assign regs_used 7 - %endif - SETUP_STACK_POINTER %4 - ASSERT regs_used <= 7 - PUSH_IF_USED 3, 4, 5, 6 - ALLOC_STACK %4 - LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 - DEFINE_ARGS_INTERNAL %0, %4, %5 -%endmacro - -%define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0 - -%macro RET 0 - %if stack_size_padded > 0 - %if required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm - %else - add rsp, stack_size_padded - %endif - %endif - POP_IF_USED 6, 5, 4, 3 - %if mmsize == 32 - vzeroupper - %endif - AUTO_REP_RET -%endmacro - -%endif ;====================================================================== - -%if WIN64 == 0 - %macro WIN64_SPILL_XMM 1 - %endmacro - %macro WIN64_RESTORE_XMM 1 - %endmacro - %macro WIN64_PUSH_XMM 0 - %endmacro -%endif - -; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either -; a branch or a branch target. So switch to a 2-byte form of ret in that case. -; We can automatically detect "follows a branch", but not a branch target. -; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.) -%macro REP_RET 0 - %if has_epilogue - RET - %else - rep ret - %endif - annotate_function_size -%endmacro - -%define last_branch_adr $$ -%macro AUTO_REP_RET 0 - %if notcpuflag(ssse3) - times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr. - %endif - ret - annotate_function_size -%endmacro - -%macro BRANCH_INSTR 0-* - %rep %0 - %macro %1 1-2 %1 - %2 %1 - %if notcpuflag(ssse3) - %%branch_instr equ $ - %xdefine last_branch_adr %%branch_instr - %endif - %endmacro - %rotate 1 - %endrep -%endmacro - -BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp - -%macro TAIL_CALL 2 ; callee, is_nonadjacent - %if has_epilogue - call %1 - RET - %elif %2 - jmp %1 - %endif - annotate_function_size -%endmacro - -;============================================================================= -; arch-independent part -;============================================================================= - -%assign function_align 16 - -; Begin a function. -; Applies any symbol mangling needed for C linkage, and sets up a define such that -; subsequent uses of the function name automatically refer to the mangled version. -; Appends cpuflags to the function name if cpuflags has been specified. -; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX -; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2). -%macro cglobal 1-2+ "" ; name, [PROLOGUE args] - cglobal_internal 1, %1 %+ SUFFIX, %2 -%endmacro -%macro cvisible 1-2+ "" ; name, [PROLOGUE args] - cglobal_internal 0, %1 %+ SUFFIX, %2 -%endmacro -%macro cglobal_internal 2-3+ - annotate_function_size - %if %1 - %xdefine %%FUNCTION_PREFIX private_prefix - ; libvpx explicitly sets visibility in shared object builds. Avoid - ; setting visibility to hidden as it may break builds that split - ; sources on e.g., directory boundaries. - %ifdef CHROMIUM - %xdefine %%VISIBILITY hidden - %else - %xdefine %%VISIBILITY - %endif - %else - %xdefine %%FUNCTION_PREFIX public_prefix - %xdefine %%VISIBILITY - %endif - %ifndef cglobaled_%2 - %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2) - %xdefine %2.skip_prologue %2 %+ .skip_prologue - CAT_XDEFINE cglobaled_, %2, 1 - %endif - %xdefine current_function %2 - %xdefine current_function_section __SECT__ - %if FORMAT_ELF - global %2:function %%VISIBILITY - %elif FORMAT_MACHO - %ifdef __NASM_VER__ - global %2 - %else - global %2:private_extern - %endif - %else - global %2 - %endif - align function_align - %2: - RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer - %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required - %assign stack_offset 0 ; stack pointer offset relative to the return address - %assign stack_size 0 ; amount of stack space that can be freely used inside a function - %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding - %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 - %ifnidn %3, "" - PROLOGUE %3 - %endif -%endmacro - -%macro cextern 1 - %xdefine %1 mangle(private_prefix %+ _ %+ %1) - CAT_XDEFINE cglobaled_, %1, 1 - extern %1 -%endmacro - -; like cextern, but without the prefix -%macro cextern_naked 1 - %ifdef PREFIX - %xdefine %1 mangle(%1) - %endif - CAT_XDEFINE cglobaled_, %1, 1 - extern %1 -%endmacro - -%macro const 1-2+ - %xdefine %1 mangle(private_prefix %+ _ %+ %1) - %if FORMAT_ELF - global %1:data hidden - %else - global %1 - %endif - %1: %2 -%endmacro - -; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default. -%if FORMAT_ELF - [SECTION .note.GNU-stack noalloc noexec nowrite progbits] -%endif - -; Tell debuggers how large the function was. -; This may be invoked multiple times per function; we rely on later instances overriding earlier ones. -; This is invoked by RET and similar macros, and also cglobal does it for the previous function, -; but if the last function in a source file doesn't use any of the standard macros for its epilogue, -; then its size might be unspecified. -%macro annotate_function_size 0 - %ifdef __YASM_VER__ - %ifdef current_function - %if FORMAT_ELF - current_function_section - %%ecf equ $ - size current_function %%ecf - current_function - __SECT__ - %endif - %endif - %endif -%endmacro - -; cpuflags - -%assign cpuflags_mmx (1<<0) -%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx -%assign cpuflags_3dnow (1<<2) | cpuflags_mmx -%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow -%assign cpuflags_sse (1<<4) | cpuflags_mmx2 -%assign cpuflags_sse2 (1<<5) | cpuflags_sse -%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 -%assign cpuflags_sse3 (1<<7) | cpuflags_sse2 -%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3 -%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3 -%assign cpuflags_sse42 (1<<10)| cpuflags_sse4 -%assign cpuflags_avx (1<<11)| cpuflags_sse42 -%assign cpuflags_xop (1<<12)| cpuflags_avx -%assign cpuflags_fma4 (1<<13)| cpuflags_avx -%assign cpuflags_fma3 (1<<14)| cpuflags_avx -%assign cpuflags_avx2 (1<<15)| cpuflags_fma3 - -%assign cpuflags_cache32 (1<<16) -%assign cpuflags_cache64 (1<<17) -%assign cpuflags_slowctz (1<<18) -%assign cpuflags_lzcnt (1<<19) -%assign cpuflags_aligned (1<<20) ; not a cpu feature, but a function variant -%assign cpuflags_atom (1<<21) -%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt -%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1 - -; Returns a boolean value expressing whether or not the specified cpuflag is enabled. -%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) -%define notcpuflag(x) (cpuflag(x) ^ 1) - -; Takes an arbitrary number of cpuflags from the above list. -; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. -; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. -%macro INIT_CPUFLAGS 0-* - %xdefine SUFFIX - %undef cpuname - %assign cpuflags 0 - - %if %0 >= 1 - %rep %0 - %ifdef cpuname - %xdefine cpuname cpuname %+ _%1 - %else - %xdefine cpuname %1 - %endif - %assign cpuflags cpuflags | cpuflags_%1 - %rotate 1 - %endrep - %xdefine SUFFIX _ %+ cpuname - - %if cpuflag(avx) - %assign avx_enabled 1 - %endif - %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2)) - %define mova movaps - %define movu movups - %define movnta movntps - %endif - %if cpuflag(aligned) - %define movu mova - %elif cpuflag(sse3) && notcpuflag(ssse3) - %define movu lddqu - %endif - %endif - - %if ARCH_X86_64 || cpuflag(sse2) - %ifdef __NASM_VER__ - ALIGNMODE k8 - %else - CPU amdnop - %endif - %else - %ifdef __NASM_VER__ - ALIGNMODE nop - %else - CPU basicnop - %endif - %endif -%endmacro - -; Merge mmx and sse* -; m# is a simd register of the currently selected size -; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m# -; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m# -; (All 3 remain in sync through SWAP.) - -%macro CAT_XDEFINE 3 - %xdefine %1%2 %3 -%endmacro - -%macro CAT_UNDEF 2 - %undef %1%2 -%endmacro - -%macro INIT_MMX 0-1+ - %assign avx_enabled 0 - %define RESET_MM_PERMUTATION INIT_MMX %1 - %define mmsize 8 - %define num_mmregs 8 - %define mova movq - %define movu movq - %define movh movd - %define movnta movntq - %assign %%i 0 - %rep 8 - CAT_XDEFINE m, %%i, mm %+ %%i - CAT_XDEFINE nnmm, %%i, %%i - %assign %%i %%i+1 - %endrep - %rep 8 - CAT_UNDEF m, %%i - CAT_UNDEF nnmm, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -%macro INIT_XMM 0-1+ - %assign avx_enabled 0 - %define RESET_MM_PERMUTATION INIT_XMM %1 - %define mmsize 16 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %endif - %define mova movdqa - %define movu movdqu - %define movh movq - %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, xmm %+ %%i - CAT_XDEFINE nnxmm, %%i, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -%macro INIT_YMM 0-1+ - %assign avx_enabled 1 - %define RESET_MM_PERMUTATION INIT_YMM %1 - %define mmsize 32 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %endif - %define mova movdqa - %define movu movdqu - %undef movh - %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, ymm %+ %%i - CAT_XDEFINE nnymm, %%i, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -INIT_XMM - -%macro DECLARE_MMCAST 1 - %define mmmm%1 mm%1 - %define mmxmm%1 mm%1 - %define mmymm%1 mm%1 - %define xmmmm%1 mm%1 - %define xmmxmm%1 xmm%1 - %define xmmymm%1 xmm%1 - %define ymmmm%1 mm%1 - %define ymmxmm%1 xmm%1 - %define ymmymm%1 ymm%1 - %define xm%1 xmm %+ m%1 - %define ym%1 ymm %+ m%1 -%endmacro - -%assign i 0 -%rep 16 - DECLARE_MMCAST i - %assign i i+1 -%endrep - -; I often want to use macros that permute their arguments. e.g. there's no -; efficient way to implement butterfly or transpose or dct without swapping some -; arguments. -; -; I would like to not have to manually keep track of the permutations: -; If I insert a permutation in the middle of a function, it should automatically -; change everything that follows. For more complex macros I may also have multiple -; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations. -; -; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that -; permutes its arguments. It's equivalent to exchanging the contents of the -; registers, except that this way you exchange the register names instead, so it -; doesn't cost any cycles. - -%macro PERMUTE 2-* ; takes a list of pairs to swap - %rep %0/2 - %xdefine %%tmp%2 m%2 - %rotate 2 - %endrep - %rep %0/2 - %xdefine m%1 %%tmp%2 - CAT_XDEFINE nn, m%1, %1 - %rotate 2 - %endrep -%endmacro - -%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs) - %ifnum %1 ; SWAP 0, 1, ... - SWAP_INTERNAL_NUM %1, %2 - %else ; SWAP m0, m1, ... - SWAP_INTERNAL_NAME %1, %2 - %endif -%endmacro - -%macro SWAP_INTERNAL_NUM 2-* - %rep %0-1 - %xdefine %%tmp m%1 - %xdefine m%1 m%2 - %xdefine m%2 %%tmp - CAT_XDEFINE nn, m%1, %1 - CAT_XDEFINE nn, m%2, %2 - %rotate 1 - %endrep -%endmacro - -%macro SWAP_INTERNAL_NAME 2-* - %xdefine %%args nn %+ %1 - %rep %0-1 - %xdefine %%args %%args, nn %+ %2 - %rotate 1 - %endrep - SWAP_INTERNAL_NUM %%args -%endmacro - -; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later -; calls to that function will automatically load the permutation, so values can -; be returned in mmregs. -%macro SAVE_MM_PERMUTATION 0-1 - %if %0 - %xdefine %%f %1_m - %else - %xdefine %%f current_function %+ _m - %endif - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE %%f, %%i, m %+ %%i - %assign %%i %%i+1 - %endrep -%endmacro - -%macro LOAD_MM_PERMUTATION 1 ; name to load from - %ifdef %1_m0 - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, %1_m %+ %%i - CAT_XDEFINE nn, m %+ %%i, %%i - %assign %%i %%i+1 - %endrep - %endif -%endmacro - -; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't -%macro call 1 - call_internal %1 %+ SUFFIX, %1 -%endmacro -%macro call_internal 2 - %xdefine %%i %2 - %ifndef cglobaled_%2 - %ifdef cglobaled_%1 - %xdefine %%i %1 - %endif - %endif - call %%i - LOAD_MM_PERMUTATION %%i -%endmacro - -; Substitutions that reduce instruction size but are functionally equivalent -%macro add 2 - %ifnum %2 - %if %2==128 - sub %1, -128 - %else - add %1, %2 - %endif - %else - add %1, %2 - %endif -%endmacro - -%macro sub 2 - %ifnum %2 - %if %2==128 - add %1, -128 - %else - sub %1, %2 - %endif - %else - sub %1, %2 - %endif -%endmacro - -;============================================================================= -; AVX abstraction layer -;============================================================================= - -%assign i 0 -%rep 16 - %if i < 8 - CAT_XDEFINE sizeofmm, i, 8 - %endif - CAT_XDEFINE sizeofxmm, i, 16 - CAT_XDEFINE sizeofymm, i, 32 - %assign i i+1 -%endrep -%undef i - -%macro CHECK_AVX_INSTR_EMU 3-* - %xdefine %%opcode %1 - %xdefine %%dst %2 - %rep %0-2 - %ifidn %%dst, %3 - %error non-avx emulation of ``%%opcode'' is not supported - %endif - %rotate 1 - %endrep -%endmacro - -;%1 == instruction -;%2 == minimal instruction set -;%3 == 1 if float, 0 if int -;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise -;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not -;%6+: operands -%macro RUN_AVX_INSTR 6-9+ - %ifnum sizeof%7 - %assign __sizeofreg sizeof%7 - %elifnum sizeof%6 - %assign __sizeofreg sizeof%6 - %else - %assign __sizeofreg mmsize - %endif - %assign __emulate_avx 0 - %if avx_enabled && __sizeofreg >= 16 - %xdefine __instr v%1 - %else - %xdefine __instr %1 - %if %0 >= 8+%4 - %assign __emulate_avx 1 - %endif - %endif - %ifnidn %2, fnord - %ifdef cpuname - %if notcpuflag(%2) - %error use of ``%1'' %2 instruction in cpuname function: current_function - %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8 - %error use of ``%1'' sse2 instruction in cpuname function: current_function - %endif - %endif - %endif - - %if __emulate_avx - %xdefine __src1 %7 - %xdefine __src2 %8 - %ifnidn %6, %7 - %if %0 >= 9 - CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9 - %else - CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8 - %endif - %if %5 && %4 == 0 - %ifnid %8 - ; 3-operand AVX instructions with a memory arg can only have it in src2, - ; whereas SSE emulation prefers to have it in src1 (i.e. the mov). - ; So, if the instruction is commutative with a memory arg, swap them. - %xdefine __src1 %8 - %xdefine __src2 %7 - %endif - %endif - %if __sizeofreg == 8 - MOVQ %6, __src1 - %elif %3 - MOVAPS %6, __src1 - %else - MOVDQA %6, __src1 - %endif - %endif - %if %0 >= 9 - %1 %6, __src2, %9 - %else - %1 %6, __src2 - %endif - %elif %0 >= 9 - __instr %6, %7, %8, %9 - %elif %0 == 8 - __instr %6, %7, %8 - %elif %0 == 7 - __instr %6, %7 - %else - __instr %6 - %endif -%endmacro - -;%1 == instruction -;%2 == minimal instruction set -;%3 == 1 if float, 0 if int -;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise -;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not -%macro AVX_INSTR 1-5 fnord, 0, 1, 0 - %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5 - %ifidn %2, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1 - %elifidn %3, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2 - %elifidn %4, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3 - %elifidn %5, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4 - %else - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5 - %endif - %endmacro -%endmacro - -; Instructions with both VEX and non-VEX encodings -; Non-destructive instructions are written without parameters -AVX_INSTR addpd, sse2, 1, 0, 1 -AVX_INSTR addps, sse, 1, 0, 1 -AVX_INSTR addsd, sse2, 1, 0, 1 -AVX_INSTR addss, sse, 1, 0, 1 -AVX_INSTR addsubpd, sse3, 1, 0, 0 -AVX_INSTR addsubps, sse3, 1, 0, 0 -AVX_INSTR aesdec, fnord, 0, 0, 0 -AVX_INSTR aesdeclast, fnord, 0, 0, 0 -AVX_INSTR aesenc, fnord, 0, 0, 0 -AVX_INSTR aesenclast, fnord, 0, 0, 0 -AVX_INSTR aesimc -AVX_INSTR aeskeygenassist -AVX_INSTR andnpd, sse2, 1, 0, 0 -AVX_INSTR andnps, sse, 1, 0, 0 -AVX_INSTR andpd, sse2, 1, 0, 1 -AVX_INSTR andps, sse, 1, 0, 1 -AVX_INSTR blendpd, sse4, 1, 0, 0 -AVX_INSTR blendps, sse4, 1, 0, 0 -AVX_INSTR blendvpd, sse4, 1, 0, 0 -AVX_INSTR blendvps, sse4, 1, 0, 0 -AVX_INSTR cmppd, sse2, 1, 1, 0 -AVX_INSTR cmpps, sse, 1, 1, 0 -AVX_INSTR cmpsd, sse2, 1, 1, 0 -AVX_INSTR cmpss, sse, 1, 1, 0 -AVX_INSTR comisd, sse2 -AVX_INSTR comiss, sse -AVX_INSTR cvtdq2pd, sse2 -AVX_INSTR cvtdq2ps, sse2 -AVX_INSTR cvtpd2dq, sse2 -AVX_INSTR cvtpd2ps, sse2 -AVX_INSTR cvtps2dq, sse2 -AVX_INSTR cvtps2pd, sse2 -AVX_INSTR cvtsd2si, sse2 -AVX_INSTR cvtsd2ss, sse2 -AVX_INSTR cvtsi2sd, sse2 -AVX_INSTR cvtsi2ss, sse -AVX_INSTR cvtss2sd, sse2 -AVX_INSTR cvtss2si, sse -AVX_INSTR cvttpd2dq, sse2 -AVX_INSTR cvttps2dq, sse2 -AVX_INSTR cvttsd2si, sse2 -AVX_INSTR cvttss2si, sse -AVX_INSTR divpd, sse2, 1, 0, 0 -AVX_INSTR divps, sse, 1, 0, 0 -AVX_INSTR divsd, sse2, 1, 0, 0 -AVX_INSTR divss, sse, 1, 0, 0 -AVX_INSTR dppd, sse4, 1, 1, 0 -AVX_INSTR dpps, sse4, 1, 1, 0 -AVX_INSTR extractps, sse4 -AVX_INSTR haddpd, sse3, 1, 0, 0 -AVX_INSTR haddps, sse3, 1, 0, 0 -AVX_INSTR hsubpd, sse3, 1, 0, 0 -AVX_INSTR hsubps, sse3, 1, 0, 0 -AVX_INSTR insertps, sse4, 1, 1, 0 -AVX_INSTR lddqu, sse3 -AVX_INSTR ldmxcsr, sse -AVX_INSTR maskmovdqu, sse2 -AVX_INSTR maxpd, sse2, 1, 0, 1 -AVX_INSTR maxps, sse, 1, 0, 1 -AVX_INSTR maxsd, sse2, 1, 0, 1 -AVX_INSTR maxss, sse, 1, 0, 1 -AVX_INSTR minpd, sse2, 1, 0, 1 -AVX_INSTR minps, sse, 1, 0, 1 -AVX_INSTR minsd, sse2, 1, 0, 1 -AVX_INSTR minss, sse, 1, 0, 1 -AVX_INSTR movapd, sse2 -AVX_INSTR movaps, sse -AVX_INSTR movd, mmx -AVX_INSTR movddup, sse3 -AVX_INSTR movdqa, sse2 -AVX_INSTR movdqu, sse2 -AVX_INSTR movhlps, sse, 1, 0, 0 -AVX_INSTR movhpd, sse2, 1, 0, 0 -AVX_INSTR movhps, sse, 1, 0, 0 -AVX_INSTR movlhps, sse, 1, 0, 0 -AVX_INSTR movlpd, sse2, 1, 0, 0 -AVX_INSTR movlps, sse, 1, 0, 0 -AVX_INSTR movmskpd, sse2 -AVX_INSTR movmskps, sse -AVX_INSTR movntdq, sse2 -AVX_INSTR movntdqa, sse4 -AVX_INSTR movntpd, sse2 -AVX_INSTR movntps, sse -AVX_INSTR movq, mmx -AVX_INSTR movsd, sse2, 1, 0, 0 -AVX_INSTR movshdup, sse3 -AVX_INSTR movsldup, sse3 -AVX_INSTR movss, sse, 1, 0, 0 -AVX_INSTR movupd, sse2 -AVX_INSTR movups, sse -AVX_INSTR mpsadbw, sse4 -AVX_INSTR mulpd, sse2, 1, 0, 1 -AVX_INSTR mulps, sse, 1, 0, 1 -AVX_INSTR mulsd, sse2, 1, 0, 1 -AVX_INSTR mulss, sse, 1, 0, 1 -AVX_INSTR orpd, sse2, 1, 0, 1 -AVX_INSTR orps, sse, 1, 0, 1 -AVX_INSTR pabsb, ssse3 -AVX_INSTR pabsd, ssse3 -AVX_INSTR pabsw, ssse3 -AVX_INSTR packsswb, mmx, 0, 0, 0 -AVX_INSTR packssdw, mmx, 0, 0, 0 -AVX_INSTR packuswb, mmx, 0, 0, 0 -AVX_INSTR packusdw, sse4, 0, 0, 0 -AVX_INSTR paddb, mmx, 0, 0, 1 -AVX_INSTR paddw, mmx, 0, 0, 1 -AVX_INSTR paddd, mmx, 0, 0, 1 -AVX_INSTR paddq, sse2, 0, 0, 1 -AVX_INSTR paddsb, mmx, 0, 0, 1 -AVX_INSTR paddsw, mmx, 0, 0, 1 -AVX_INSTR paddusb, mmx, 0, 0, 1 -AVX_INSTR paddusw, mmx, 0, 0, 1 -AVX_INSTR palignr, ssse3 -AVX_INSTR pand, mmx, 0, 0, 1 -AVX_INSTR pandn, mmx, 0, 0, 0 -AVX_INSTR pavgb, mmx2, 0, 0, 1 -AVX_INSTR pavgw, mmx2, 0, 0, 1 -AVX_INSTR pblendvb, sse4, 0, 0, 0 -AVX_INSTR pblendw, sse4 -AVX_INSTR pclmulqdq -AVX_INSTR pcmpestri, sse42 -AVX_INSTR pcmpestrm, sse42 -AVX_INSTR pcmpistri, sse42 -AVX_INSTR pcmpistrm, sse42 -AVX_INSTR pcmpeqb, mmx, 0, 0, 1 -AVX_INSTR pcmpeqw, mmx, 0, 0, 1 -AVX_INSTR pcmpeqd, mmx, 0, 0, 1 -AVX_INSTR pcmpeqq, sse4, 0, 0, 1 -AVX_INSTR pcmpgtb, mmx, 0, 0, 0 -AVX_INSTR pcmpgtw, mmx, 0, 0, 0 -AVX_INSTR pcmpgtd, mmx, 0, 0, 0 -AVX_INSTR pcmpgtq, sse42, 0, 0, 0 -AVX_INSTR pextrb, sse4 -AVX_INSTR pextrd, sse4 -AVX_INSTR pextrq, sse4 -AVX_INSTR pextrw, mmx2 -AVX_INSTR phaddw, ssse3, 0, 0, 0 -AVX_INSTR phaddd, ssse3, 0, 0, 0 -AVX_INSTR phaddsw, ssse3, 0, 0, 0 -AVX_INSTR phminposuw, sse4 -AVX_INSTR phsubw, ssse3, 0, 0, 0 -AVX_INSTR phsubd, ssse3, 0, 0, 0 -AVX_INSTR phsubsw, ssse3, 0, 0, 0 -AVX_INSTR pinsrb, sse4 -AVX_INSTR pinsrd, sse4 -AVX_INSTR pinsrq, sse4 -AVX_INSTR pinsrw, mmx2 -AVX_INSTR pmaddwd, mmx, 0, 0, 1 -AVX_INSTR pmaddubsw, ssse3, 0, 0, 0 -AVX_INSTR pmaxsb, sse4, 0, 0, 1 -AVX_INSTR pmaxsw, mmx2, 0, 0, 1 -AVX_INSTR pmaxsd, sse4, 0, 0, 1 -AVX_INSTR pmaxub, mmx2, 0, 0, 1 -AVX_INSTR pmaxuw, sse4, 0, 0, 1 -AVX_INSTR pmaxud, sse4, 0, 0, 1 -AVX_INSTR pminsb, sse4, 0, 0, 1 -AVX_INSTR pminsw, mmx2, 0, 0, 1 -AVX_INSTR pminsd, sse4, 0, 0, 1 -AVX_INSTR pminub, mmx2, 0, 0, 1 -AVX_INSTR pminuw, sse4, 0, 0, 1 -AVX_INSTR pminud, sse4, 0, 0, 1 -AVX_INSTR pmovmskb, mmx2 -AVX_INSTR pmovsxbw, sse4 -AVX_INSTR pmovsxbd, sse4 -AVX_INSTR pmovsxbq, sse4 -AVX_INSTR pmovsxwd, sse4 -AVX_INSTR pmovsxwq, sse4 -AVX_INSTR pmovsxdq, sse4 -AVX_INSTR pmovzxbw, sse4 -AVX_INSTR pmovzxbd, sse4 -AVX_INSTR pmovzxbq, sse4 -AVX_INSTR pmovzxwd, sse4 -AVX_INSTR pmovzxwq, sse4 -AVX_INSTR pmovzxdq, sse4 -AVX_INSTR pmuldq, sse4, 0, 0, 1 -AVX_INSTR pmulhrsw, ssse3, 0, 0, 1 -AVX_INSTR pmulhuw, mmx2, 0, 0, 1 -AVX_INSTR pmulhw, mmx, 0, 0, 1 -AVX_INSTR pmullw, mmx, 0, 0, 1 -AVX_INSTR pmulld, sse4, 0, 0, 1 -AVX_INSTR pmuludq, sse2, 0, 0, 1 -AVX_INSTR por, mmx, 0, 0, 1 -AVX_INSTR psadbw, mmx2, 0, 0, 1 -AVX_INSTR pshufb, ssse3, 0, 0, 0 -AVX_INSTR pshufd, sse2 -AVX_INSTR pshufhw, sse2 -AVX_INSTR pshuflw, sse2 -AVX_INSTR psignb, ssse3, 0, 0, 0 -AVX_INSTR psignw, ssse3, 0, 0, 0 -AVX_INSTR psignd, ssse3, 0, 0, 0 -AVX_INSTR psllw, mmx, 0, 0, 0 -AVX_INSTR pslld, mmx, 0, 0, 0 -AVX_INSTR psllq, mmx, 0, 0, 0 -AVX_INSTR pslldq, sse2, 0, 0, 0 -AVX_INSTR psraw, mmx, 0, 0, 0 -AVX_INSTR psrad, mmx, 0, 0, 0 -AVX_INSTR psrlw, mmx, 0, 0, 0 -AVX_INSTR psrld, mmx, 0, 0, 0 -AVX_INSTR psrlq, mmx, 0, 0, 0 -AVX_INSTR psrldq, sse2, 0, 0, 0 -AVX_INSTR psubb, mmx, 0, 0, 0 -AVX_INSTR psubw, mmx, 0, 0, 0 -AVX_INSTR psubd, mmx, 0, 0, 0 -AVX_INSTR psubq, sse2, 0, 0, 0 -AVX_INSTR psubsb, mmx, 0, 0, 0 -AVX_INSTR psubsw, mmx, 0, 0, 0 -AVX_INSTR psubusb, mmx, 0, 0, 0 -AVX_INSTR psubusw, mmx, 0, 0, 0 -AVX_INSTR ptest, sse4 -AVX_INSTR punpckhbw, mmx, 0, 0, 0 -AVX_INSTR punpckhwd, mmx, 0, 0, 0 -AVX_INSTR punpckhdq, mmx, 0, 0, 0 -AVX_INSTR punpckhqdq, sse2, 0, 0, 0 -AVX_INSTR punpcklbw, mmx, 0, 0, 0 -AVX_INSTR punpcklwd, mmx, 0, 0, 0 -AVX_INSTR punpckldq, mmx, 0, 0, 0 -AVX_INSTR punpcklqdq, sse2, 0, 0, 0 -AVX_INSTR pxor, mmx, 0, 0, 1 -AVX_INSTR rcpps, sse, 1, 0, 0 -AVX_INSTR rcpss, sse, 1, 0, 0 -AVX_INSTR roundpd, sse4 -AVX_INSTR roundps, sse4 -AVX_INSTR roundsd, sse4 -AVX_INSTR roundss, sse4 -AVX_INSTR rsqrtps, sse, 1, 0, 0 -AVX_INSTR rsqrtss, sse, 1, 0, 0 -AVX_INSTR shufpd, sse2, 1, 1, 0 -AVX_INSTR shufps, sse, 1, 1, 0 -AVX_INSTR sqrtpd, sse2, 1, 0, 0 -AVX_INSTR sqrtps, sse, 1, 0, 0 -AVX_INSTR sqrtsd, sse2, 1, 0, 0 -AVX_INSTR sqrtss, sse, 1, 0, 0 -AVX_INSTR stmxcsr, sse -AVX_INSTR subpd, sse2, 1, 0, 0 -AVX_INSTR subps, sse, 1, 0, 0 -AVX_INSTR subsd, sse2, 1, 0, 0 -AVX_INSTR subss, sse, 1, 0, 0 -AVX_INSTR ucomisd, sse2 -AVX_INSTR ucomiss, sse -AVX_INSTR unpckhpd, sse2, 1, 0, 0 -AVX_INSTR unpckhps, sse, 1, 0, 0 -AVX_INSTR unpcklpd, sse2, 1, 0, 0 -AVX_INSTR unpcklps, sse, 1, 0, 0 -AVX_INSTR xorpd, sse2, 1, 0, 1 -AVX_INSTR xorps, sse, 1, 0, 1 - -; 3DNow instructions, for sharing code between AVX, SSE and 3DN -AVX_INSTR pfadd, 3dnow, 1, 0, 1 -AVX_INSTR pfsub, 3dnow, 1, 0, 0 -AVX_INSTR pfmul, 3dnow, 1, 0, 1 - -; base-4 constants for shuffles -%assign i 0 -%rep 256 - %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3) - %if j < 10 - CAT_XDEFINE q000, j, i - %elif j < 100 - CAT_XDEFINE q00, j, i - %elif j < 1000 - CAT_XDEFINE q0, j, i - %else - CAT_XDEFINE q, j, i - %endif - %assign i i+1 -%endrep -%undef i -%undef j - -%macro FMA_INSTR 3 - %macro %1 4-7 %1, %2, %3 - %if cpuflag(xop) - v%5 %1, %2, %3, %4 - %elifnidn %1, %4 - %6 %1, %2, %3 - %7 %1, %4 - %else - %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported - %endif - %endmacro -%endmacro - -FMA_INSTR pmacsww, pmullw, paddw -FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation -FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation -FMA_INSTR pmadcswd, pmaddwd, paddd - -; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax. -; FMA3 is only possible if dst is the same as one of the src registers. -; Either src2 or src3 can be a memory operand. -%macro FMA4_INSTR 2-* - %push fma4_instr - %xdefine %$prefix %1 - %rep %0 - 1 - %macro %$prefix%2 4-6 %$prefix, %2 - %if notcpuflag(fma3) && notcpuflag(fma4) - %error use of ``%5%6'' fma instruction in cpuname function: current_function - %elif cpuflag(fma4) - v%5%6 %1, %2, %3, %4 - %elifidn %1, %2 - ; If %3 or %4 is a memory operand it needs to be encoded as the last operand. - %ifid %3 - v%{5}213%6 %2, %3, %4 - %else - v%{5}132%6 %2, %4, %3 - %endif - %elifidn %1, %3 - v%{5}213%6 %3, %2, %4 - %elifidn %1, %4 - v%{5}231%6 %4, %2, %3 - %else - %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported - %endif - %endmacro - %rotate 1 - %endrep - %pop -%endmacro - -FMA4_INSTR fmadd, pd, ps, sd, ss -FMA4_INSTR fmaddsub, pd, ps -FMA4_INSTR fmsub, pd, ps, sd, ss -FMA4_INSTR fmsubadd, pd, ps -FMA4_INSTR fnmadd, pd, ps, sd, ss -FMA4_INSTR fnmsub, pd, ps, sd, ss - -; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) -%ifdef __YASM_VER__ - %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 - %macro vpbroadcastq 2 - %if sizeof%1 == 16 - movddup %1, %2 - %else - vbroadcastsd %1, %2 - %endif - %endmacro - %endif -%endif diff --git a/thirdparty/libvpx/vp8/common/alloccommon.c b/thirdparty/libvpx/vp8/common/alloccommon.c deleted file mode 100644 index 8dfd4ce203..0000000000 --- a/thirdparty/libvpx/vp8/common/alloccommon.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "alloccommon.h" -#include "blockd.h" -#include "vpx_mem/vpx_mem.h" -#include "onyxc_int.h" -#include "findnearmv.h" -#include "entropymode.h" -#include "systemdependent.h" - -void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) -{ - int i; - for (i = 0; i < NUM_YV12_BUFFERS; i++) - vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]); - - vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); -#if CONFIG_POSTPROC - vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); - if (oci->post_proc_buffer_int_used) - vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); - - vpx_free(oci->pp_limits_buffer); - oci->pp_limits_buffer = NULL; -#endif - - vpx_free(oci->above_context); - vpx_free(oci->mip); -#if CONFIG_ERROR_CONCEALMENT - vpx_free(oci->prev_mip); - oci->prev_mip = NULL; -#endif - - oci->above_context = NULL; - oci->mip = NULL; -} - -int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) -{ - int i; - - vp8_de_alloc_frame_buffers(oci); - - /* our internal buffers are always multiples of 16 */ - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if ((height & 0xf) != 0) - height += 16 - (height & 0xf); - - - for (i = 0; i < NUM_YV12_BUFFERS; i++) - { - oci->fb_idx_ref_cnt[i] = 0; - oci->yv12_fb[i].flags = 0; - if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) - goto allocation_fail; - } - - oci->new_fb_idx = 0; - oci->lst_fb_idx = 1; - oci->gld_fb_idx = 2; - oci->alt_fb_idx = 3; - - oci->fb_idx_ref_cnt[0] = 1; - oci->fb_idx_ref_cnt[1] = 1; - oci->fb_idx_ref_cnt[2] = 1; - oci->fb_idx_ref_cnt[3] = 1; - - if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) - goto allocation_fail; - - oci->mb_rows = height >> 4; - oci->mb_cols = width >> 4; - oci->MBs = oci->mb_rows * oci->mb_cols; - oci->mode_info_stride = oci->mb_cols + 1; - oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); - - if (!oci->mip) - goto allocation_fail; - - oci->mi = oci->mip + oci->mode_info_stride + 1; - - /* Allocation of previous mode info will be done in vp8_decode_frame() - * as it is a decoder only data */ - - oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); - - if (!oci->above_context) - goto allocation_fail; - -#if CONFIG_POSTPROC - if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) - goto allocation_fail; - - oci->post_proc_buffer_int_used = 0; - memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); - memset(oci->post_proc_buffer.buffer_alloc, 128, - oci->post_proc_buffer.frame_size); - - /* Allocate buffer to store post-processing filter coefficients. - * - * Note: Round up mb_cols to support SIMD reads - */ - oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); - if (!oci->pp_limits_buffer) - goto allocation_fail; -#endif - - return 0; - -allocation_fail: - vp8_de_alloc_frame_buffers(oci); - return 1; -} - -void vp8_setup_version(VP8_COMMON *cm) -{ - switch (cm->version) - { - case 0: - cm->no_lpf = 0; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 0; - cm->full_pixel = 0; - break; - case 1: - cm->no_lpf = 0; - cm->filter_type = SIMPLE_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 0; - break; - case 2: - cm->no_lpf = 1; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 0; - break; - case 3: - cm->no_lpf = 1; - cm->filter_type = SIMPLE_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 1; - break; - default: - /*4,5,6,7 are reserved for future use*/ - cm->no_lpf = 0; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 0; - cm->full_pixel = 0; - break; - } -} -void vp8_create_common(VP8_COMMON *oci) -{ - vp8_machine_specific_config(oci); - - vp8_init_mbmode_probs(oci); - vp8_default_bmode_probs(oci->fc.bmode_prob); - - oci->mb_no_coeff_skip = 1; - oci->no_lpf = 0; - oci->filter_type = NORMAL_LOOPFILTER; - oci->use_bilinear_mc_filter = 0; - oci->full_pixel = 0; - oci->multi_token_partition = ONE_PARTITION; - oci->clamp_type = RECON_CLAMP_REQUIRED; - - /* Initialize reference frame sign bias structure to defaults */ - memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); - - /* Default disable buffer to buffer copying */ - oci->copy_buffer_to_gf = 0; - oci->copy_buffer_to_arf = 0; -} - -void vp8_remove_common(VP8_COMMON *oci) -{ - vp8_de_alloc_frame_buffers(oci); -} diff --git a/thirdparty/libvpx/vp8/common/alloccommon.h b/thirdparty/libvpx/vp8/common/alloccommon.h deleted file mode 100644 index 93e99d76b1..0000000000 --- a/thirdparty/libvpx/vp8/common/alloccommon.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ALLOCCOMMON_H_ -#define VP8_COMMON_ALLOCCOMMON_H_ - -#include "onyxc_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_create_common(VP8_COMMON *oci); -void vp8_remove_common(VP8_COMMON *oci); -void vp8_de_alloc_frame_buffers(VP8_COMMON *oci); -int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height); -void vp8_setup_version(VP8_COMMON *oci); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ALLOCCOMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c b/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c deleted file mode 100644 index 5840c2bbaa..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vp8/common/loopfilter.h" -#include "vp8/common/onyxc_int.h" - -#define prototype_loopfilter(sym) \ - void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ - const unsigned char *limit, const unsigned char *thresh, int count) - -#if HAVE_MEDIA -extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6); -extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6); -extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6); -extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6); -#endif - -#if HAVE_NEON -typedef void loopfilter_y_neon(unsigned char *src, int pitch, - unsigned char blimit, unsigned char limit, unsigned char thresh); -typedef void loopfilter_uv_neon(unsigned char *u, int pitch, - unsigned char blimit, unsigned char limit, unsigned char thresh, - unsigned char *v); - -extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon; -extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon; - -extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; -extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon; -#endif - -#if HAVE_MEDIA -/* ARMV6/MEDIA loopfilter functions*/ -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit); -} - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit); -} -#endif - -#if HAVE_NEON -/* NEON loopfilter functions */ -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char mblim = *lfi->mblim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); -} - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char mblim = *lfi->mblim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); -} - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char blim = *lfi->blim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr); - vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr); - vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride); -} - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char blim = *lfi->blim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr); - vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr); - vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr); - - if (u_ptr) - vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4); -} -#endif diff --git a/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c deleted file mode 100644 index bb6ea76ba4..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const uint8_t bifilter4_coeff[8][2] = { - {128, 0}, - {112, 16}, - { 96, 32}, - { 80, 48}, - { 64, 64}, - { 48, 80}, - { 32, 96}, - { 16, 112} -}; - -void vp8_bilinear_predict8x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8; - uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16; - uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; - - if (xoffset == 0) { // skip_1stpass_filter - d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d26u8 = vld1_u8(src_ptr); - } else { - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q5u8 = vld1q_u8(src_ptr); - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - q10u16 = vmlal_u8(q10u16, d11u8, d1u8); - - d22u8 = vqrshrn_n_u16(q6u16, 7); - d23u8 = vqrshrn_n_u16(q7u16, 7); - d24u8 = vqrshrn_n_u16(q8u16, 7); - d25u8 = vqrshrn_n_u16(q9u16, 7); - d26u8 = vqrshrn_n_u16(q10u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d25u8); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d22u8, d0u8); - q2u16 = vmull_u8(d23u8, d0u8); - q3u16 = vmull_u8(d24u8, d0u8); - q4u16 = vmull_u8(d25u8, d0u8); - - q1u16 = vmlal_u8(q1u16, d23u8, d1u8); - q2u16 = vmlal_u8(q2u16, d24u8, d1u8); - q3u16 = vmlal_u8(q3u16, d25u8, d1u8); - q4u16 = vmlal_u8(q4u16, d26u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - - vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d5u8); - } - return; -} - -void vp8_bilinear_predict8x8_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8; - uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16; - uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; - - if (xoffset == 0) { // skip_1stpass_filter - d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d30u8 = vld1_u8(src_ptr); - } else { - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - - d22u8 = vqrshrn_n_u16(q6u16, 7); - d23u8 = vqrshrn_n_u16(q7u16, 7); - d24u8 = vqrshrn_n_u16(q8u16, 7); - d25u8 = vqrshrn_n_u16(q9u16, 7); - - // first_pass filtering on the rest 5-line data - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q5u8 = vld1q_u8(src_ptr); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - q10u16 = vmlal_u8(q10u16, d11u8, d1u8); - - d26u8 = vqrshrn_n_u16(q6u16, 7); - d27u8 = vqrshrn_n_u16(q7u16, 7); - d28u8 = vqrshrn_n_u16(q8u16, 7); - d29u8 = vqrshrn_n_u16(q9u16, 7); - d30u8 = vqrshrn_n_u16(q10u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d29u8); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d22u8, d0u8); - q2u16 = vmull_u8(d23u8, d0u8); - q3u16 = vmull_u8(d24u8, d0u8); - q4u16 = vmull_u8(d25u8, d0u8); - q5u16 = vmull_u8(d26u8, d0u8); - q6u16 = vmull_u8(d27u8, d0u8); - q7u16 = vmull_u8(d28u8, d0u8); - q8u16 = vmull_u8(d29u8, d0u8); - - q1u16 = vmlal_u8(q1u16, d23u8, d1u8); - q2u16 = vmlal_u8(q2u16, d24u8, d1u8); - q3u16 = vmlal_u8(q3u16, d25u8, d1u8); - q4u16 = vmlal_u8(q4u16, d26u8, d1u8); - q5u16 = vmlal_u8(q5u16, d27u8, d1u8); - q6u16 = vmlal_u8(q6u16, d28u8, d1u8); - q7u16 = vmlal_u8(q7u16, d29u8, d1u8); - q8u16 = vmlal_u8(q8u16, d30u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d9u8); - } - return; -} - -void vp8_bilinear_predict16x16_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - int i; - unsigned char tmp[272]; - unsigned char *tmpp; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; - uint8x8_t d19u8, d20u8, d21u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; - uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16; - uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16; - - if (xoffset == 0) { // secondpass_bfilter16x16_only - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q11u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; - } - return; - } - - if (yoffset == 0) { // firstpass_bfilter16x16_only - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - for (i = 4; i > 0 ; i--) { - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 =vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch; - } - return; - } - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - // First Pass: output_height lines x output_width columns (17x16) - tmpp = tmp; - for (i = 3; i > 0; i--) { - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 = vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16; - } - - // First-pass filtering for rest 5 lines - d14u8 = vld1_u8(src_ptr); - d15u8 = vld1_u8(src_ptr + 8); - d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q9u16 = vmull_u8(d2u8, d0u8); - q10u16 = vmull_u8(d3u8, d0u8); - q11u16 = vmull_u8(d5u8, d0u8); - q12u16 = vmull_u8(d6u8, d0u8); - q13u16 = vmull_u8(d8u8, d0u8); - q14u16 = vmull_u8(d9u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - - q9u16 = vmlal_u8(q9u16, d2u8, d1u8); - q11u16 = vmlal_u8(q11u16, d5u8, d1u8); - q13u16 = vmlal_u8(q13u16, d8u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - - q10u16 = vmlal_u8(q10u16, d3u8, d1u8); - q12u16 = vmlal_u8(q12u16, d6u8, d1u8); - q14u16 = vmlal_u8(q14u16, d9u8, d1u8); - - q1u16 = vmull_u8(d11u8, d0u8); - q2u16 = vmull_u8(d12u8, d0u8); - q3u16 = vmull_u8(d14u8, d0u8); - q4u16 = vmull_u8(d15u8, d0u8); - - d11u8 = vext_u8(d11u8, d12u8, 1); - d14u8 = vext_u8(d14u8, d15u8, 1); - - q1u16 = vmlal_u8(q1u16, d11u8, d1u8); - q3u16 = vmlal_u8(q3u16, d14u8, d1u8); - - d12u8 = vext_u8(d12u8, d13u8, 1); - d15u8 = vext_u8(d15u8, d16u8, 1); - - q2u16 = vmlal_u8(q2u16, d12u8, d1u8); - q4u16 = vmlal_u8(q4u16, d15u8, d1u8); - - d10u8 = vqrshrn_n_u16(q9u16, 7); - d11u8 = vqrshrn_n_u16(q10u16, 7); - d12u8 = vqrshrn_n_u16(q11u16, 7); - d13u8 = vqrshrn_n_u16(q12u16, 7); - d14u8 = vqrshrn_n_u16(q13u16, 7); - d15u8 = vqrshrn_n_u16(q14u16, 7); - d16u8 = vqrshrn_n_u16(q1u16, 7); - d17u8 = vqrshrn_n_u16(q2u16, 7); - d18u8 = vqrshrn_n_u16(q3u16, 7); - d19u8 = vqrshrn_n_u16(q4u16, 7); - - q5u8 = vcombine_u8(d10u8, d11u8); - q6u8 = vcombine_u8(d12u8, d13u8); - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - - vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); - - // secondpass_filter - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - tmpp = tmp; - q11u8 = vld1q_u8(tmpp); - tmpp += 16; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(tmpp); tmpp += 16; - q13u8 = vld1q_u8(tmpp); tmpp += 16; - q14u8 = vld1q_u8(tmpp); tmpp += 16; - q15u8 = vld1q_u8(tmpp); tmpp += 16; - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c deleted file mode 100644 index deced115c1..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_copy_mem8x4_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - uint8x8_t vtmp; - int r; - - for (r = 0; r < 4; r++) { - vtmp = vld1_u8(src); - vst1_u8(dst, vtmp); - src += src_stride; - dst += dst_stride; - } -} - -void vp8_copy_mem8x8_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - uint8x8_t vtmp; - int r; - - for (r = 0; r < 8; r++) { - vtmp = vld1_u8(src); - vst1_u8(dst, vtmp); - src += src_stride; - dst += dst_stride; - } -} - -void vp8_copy_mem16x16_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - int r; - uint8x16_t qtmp; - - for (r = 0; r < 16; r++) { - qtmp = vld1q_u8(src); - vst1q_u8(dst, qtmp); - src += src_stride; - dst += dst_stride; - } -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c deleted file mode 100644 index ad5f41d7de..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_dc_only_idct_add_neon( - int16_t input_dc, - unsigned char *pred_ptr, - int pred_stride, - unsigned char *dst_ptr, - int dst_stride) { - int i; - uint16_t a1 = ((input_dc + 4) >> 3); - uint32x2_t d2u32 = vdup_n_u32(0); - uint8x8_t d2u8; - uint16x8_t q1u16; - uint16x8_t qAdd; - - qAdd = vdupq_n_u16(a1); - - for (i = 0; i < 2; i++) { - d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0); - pred_ptr += pred_stride; - d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1); - pred_ptr += pred_stride; - - q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); - dst_ptr += dst_stride; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); - dst_ptr += dst_stride; - } -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c deleted file mode 100644 index 58e11922c7..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 35468; - -void vp8_dequant_idct_add_neon( - int16_t *input, - int16_t *dq, - unsigned char *dst, - int stride) { - unsigned char *dst0; - int32x2_t d14, d15; - int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; - int16x8_t q1, q2, q3, q4, q5, q6; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x2x2_t d2tmp0, d2tmp1; - int16x4x2_t d2tmp2, d2tmp3; - - d14 = d15 = vdup_n_s32(0); - - // load input - q3 = vld1q_s16(input); - vst1q_s16(input, qEmpty); - input += 8; - q4 = vld1q_s16(input); - vst1q_s16(input, qEmpty); - - // load dq - q5 = vld1q_s16(dq); - dq += 8; - q6 = vld1q_s16(dq); - - // load src from dst - dst0 = dst; - d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0); - dst0 += stride; - d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1); - dst0 += stride; - d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0); - dst0 += stride; - d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1); - - q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3), - vreinterpretq_u16_s16(q5))); - q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4), - vreinterpretq_u16_s16(q6))); - - d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); - d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); - - q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2)); - - q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); - q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); - - q3 = vshrq_n_s16(q3, 1); - q4 = vshrq_n_s16(q4, 1); - - q3 = vqaddq_s16(q3, q2); - q4 = vqaddq_s16(q4, q2); - - d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); - d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), - vreinterpret_s16_s32(d2tmp1.val[0])); - d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), - vreinterpret_s16_s32(d2tmp1.val[1])); - - // loop 2 - q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]); - - q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); - q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); - - d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]); - d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]); - - q3 = vshrq_n_s16(q3, 1); - q4 = vshrq_n_s16(q4, 1); - - q3 = vqaddq_s16(q3, q2); - q4 = vqaddq_s16(q4, q2); - - d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); - d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2 = vrshr_n_s16(d2, 3); - d3 = vrshr_n_s16(d3, 3); - d4 = vrshr_n_s16(d4, 3); - d5 = vrshr_n_s16(d5, 3); - - d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), - vreinterpret_s16_s32(d2tmp1.val[0])); - d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), - vreinterpret_s16_s32(d2tmp1.val[1])); - - q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]); - q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]); - - q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1), - vreinterpret_u8_s32(d14))); - q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2), - vreinterpret_u8_s32(d15))); - - d14 = vreinterpret_s32_u8(vqmovun_s16(q1)); - d15 = vreinterpret_s32_u8(vqmovun_s16(q2)); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d14, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d14, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d15, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d15, 1); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c deleted file mode 100644 index 54e709dd3c..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp8/common/blockd.h" - -void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { - int16x8x2_t qQ, qDQC, qDQ; - - qQ = vld2q_s16(d->qcoeff); - qDQC = vld2q_s16(DQC); - - qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); - qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); - - vst2q_s16(d->dqcoeff, qDQ); -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c deleted file mode 100644 index fb327a7260..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" - -/* place these declarations here because we don't want to maintain them - * outside of this scope - */ -void idct_dequant_full_2x_neon(short *q, short *dq, - unsigned char *dst, int stride); -void idct_dequant_0_2x_neon(short *q, short dq, - unsigned char *dst, int stride); - - -void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, - unsigned char *dst, - int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (((short *)(eobs))[0]) - { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dst, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dst, stride); - } - - if (((short *)(eobs))[1]) - { - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon (q+32, dq, dst+8, stride); - else - idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride); - } - q += 64; - dst += 4*stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, - unsigned char *dstu, - unsigned char *dstv, - int stride, char *eobs) -{ - if (((short *)(eobs))[0]) - { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstu, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstu, stride); - } - - q += 32; - dstu += 4*stride; - - if (((short *)(eobs))[1]) - { - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstu, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstu, stride); - } - - q += 32; - - if (((short *)(eobs))[2]) - { - if (((short *)eobs)[2] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstv, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstv, stride); - } - - q += 32; - dstv += 4*stride; - - if (((short *)(eobs))[3]) - { - if (((short *)eobs)[3] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstv, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstv, stride); - } -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c deleted file mode 100644 index e6f862fa89..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void idct_dequant_0_2x_neon( - int16_t *q, - int16_t dq, - unsigned char *dst, - int stride) { - unsigned char *dst0; - int i, a0, a1; - int16x8x2_t q2Add; - int32x2_t d2s32 = vdup_n_s32(0), - d4s32 = vdup_n_s32(0); - uint8x8_t d2u8, d4u8; - uint16x8_t q1u16, q2u16; - - a0 = ((q[0] * dq) + 4) >> 3; - a1 = ((q[16] * dq) + 4) >> 3; - q[0] = q[16] = 0; - q2Add.val[0] = vdupq_n_s16((int16_t)a0); - q2Add.val[1] = vdupq_n_s16((int16_t)a1); - - for (i = 0; i < 2; i++, dst += 4) { - dst0 = dst; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); - dst0 += stride; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d2s32)); - q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d4s32)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); - - d2s32 = vreinterpret_s32_u8(d2u8); - d4s32 = vreinterpret_s32_u8(d4u8); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d2s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d2s32, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 1); - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c deleted file mode 100644 index a60ed46b76..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 17734; -// because the lowest bit in 0x8a8c is 0, we can pre-shift this - -void idct_dequant_full_2x_neon( - int16_t *q, - int16_t *dq, - unsigned char *dst, - int stride) { - unsigned char *dst0, *dst1; - int32x2_t d28, d29, d30, d31; - int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x4x2_t q2tmp0, q2tmp1; - int16x8x2_t q2tmp2, q2tmp3; - int16x4_t dLow0, dLow1, dHigh0, dHigh1; - - d28 = d29 = d30 = d31 = vdup_n_s32(0); - - // load dq - q0 = vld1q_s16(dq); - dq += 8; - q1 = vld1q_s16(dq); - - // load q - q2 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q3 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q4 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q5 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - - // load src from dst - dst0 = dst; - dst1 = dst + 4; - d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); - dst0 += stride; - d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); - dst1 += stride; - d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); - dst0 += stride; - d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); - dst1 += stride; - - d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); - dst0 += stride; - d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); - dst1 += stride; - d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); - d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); - - q2 = vmulq_s16(q2, q0); - q3 = vmulq_s16(q3, q1); - q4 = vmulq_s16(q4, q0); - q5 = vmulq_s16(q5, q1); - - // vswp - dLow0 = vget_low_s16(q2); - dHigh0 = vget_high_s16(q2); - dLow1 = vget_low_s16(q4); - dHigh1 = vget_high_s16(q4); - q2 = vcombine_s16(dLow0, dLow1); - q4 = vcombine_s16(dHigh0, dHigh1); - - dLow0 = vget_low_s16(q3); - dHigh0 = vget_high_s16(q3); - dLow1 = vget_low_s16(q5); - dHigh1 = vget_high_s16(q5); - q3 = vcombine_s16(dLow0, dLow1); - q5 = vcombine_s16(dHigh0, dHigh1); - - q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); - q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); - q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); - q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); - - q10 = vqaddq_s16(q2, q3); - q11 = vqsubq_s16(q2, q3); - - q8 = vshrq_n_s16(q8, 1); - q9 = vshrq_n_s16(q9, 1); - - q4 = vqaddq_s16(q4, q8); - q5 = vqaddq_s16(q5, q9); - - q2 = vqsubq_s16(q6, q5); - q3 = vqaddq_s16(q7, q4); - - q4 = vqaddq_s16(q10, q3); - q5 = vqaddq_s16(q11, q2); - q6 = vqsubq_s16(q11, q2); - q7 = vqsubq_s16(q10, q3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - // loop 2 - q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); - q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); - q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); - q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); - - q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); - q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); - - q10 = vshrq_n_s16(q10, 1); - q11 = vshrq_n_s16(q11, 1); - - q10 = vqaddq_s16(q2tmp2.val[1], q10); - q11 = vqaddq_s16(q2tmp3.val[1], q11); - - q8 = vqsubq_s16(q8, q11); - q9 = vqaddq_s16(q9, q10); - - q4 = vqaddq_s16(q2, q9); - q5 = vqaddq_s16(q3, q8); - q6 = vqsubq_s16(q3, q8); - q7 = vqsubq_s16(q2, q9); - - q4 = vrshrq_n_s16(q4, 3); - q5 = vrshrq_n_s16(q5, 3); - q6 = vrshrq_n_s16(q6, 3); - q7 = vrshrq_n_s16(q7, 3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), - vreinterpret_u8_s32(d28))); - q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), - vreinterpret_u8_s32(d29))); - q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), - vreinterpret_u8_s32(d30))); - q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), - vreinterpret_u8_s32(d31))); - - d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); - d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); - d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); - d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); - - dst0 = dst; - dst1 = dst + 4; - vst1_lane_s32((int32_t *)dst0, d28, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d28, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d29, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d29, 1); - dst1 += stride; - - vst1_lane_s32((int32_t *)dst0, d30, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d30, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d31, 0); - vst1_lane_s32((int32_t *)dst1, d31, 1); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c deleted file mode 100644 index 6ea9dd712a..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vp8_short_inv_walsh4x4_neon( - int16_t *input, - int16_t *mb_dqcoeff) { - int16x8_t q0s16, q1s16, q2s16, q3s16; - int16x4_t d4s16, d5s16, d6s16, d7s16; - int16x4x2_t v2tmp0, v2tmp1; - int32x2x2_t v2tmp2, v2tmp3; - int16x8_t qAdd3; - - q0s16 = vld1q_s16(input); - q1s16 = vld1q_s16(input + 8); - - // 1st for loop - d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); - d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); - d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); - d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); - - q2s16 = vcombine_s16(d4s16, d5s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - q0s16 = vaddq_s16(q2s16, q3s16); - q1s16 = vsubq_s16(q2s16, q3s16); - - v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)), - vreinterpret_s32_s16(vget_low_s16(q1s16))); - v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)), - vreinterpret_s32_s16(vget_high_s16(q1s16))); - v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), - vreinterpret_s16_s32(v2tmp3.val[0])); - v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), - vreinterpret_s16_s32(v2tmp3.val[1])); - - // 2nd for loop - d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); - d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); - d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); - d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); - q2s16 = vcombine_s16(d4s16, d5s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - qAdd3 = vdupq_n_s16(3); - - q0s16 = vaddq_s16(q2s16, q3s16); - q1s16 = vsubq_s16(q2s16, q3s16); - - q0s16 = vaddq_s16(q0s16, qAdd3); - q1s16 = vaddq_s16(q1s16, qAdd3); - - q0s16 = vshrq_n_s16(q0s16, 3); - q1s16 = vshrq_n_s16(q1s16, 3); - - // store - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3); - mb_dqcoeff += 16; - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c deleted file mode 100644 index b25686ffb8..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" - -static INLINE void vp8_loop_filter_simple_horizontal_edge_neon( - unsigned char *s, - int p, - const unsigned char *blimit) { - uint8_t *sp; - uint8x16_t qblimit, q0u8; - uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8; - int16x8_t q2s16, q3s16, q13s16; - int8x8_t d8s8, d9s8; - int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8; - - qblimit = vdupq_n_u8(*blimit); - - sp = s - (p << 1); - q5u8 = vld1q_u8(sp); - sp += p; - q6u8 = vld1q_u8(sp); - sp += p; - q7u8 = vld1q_u8(sp); - sp += p; - q8u8 = vld1q_u8(sp); - - q15u8 = vabdq_u8(q6u8, q7u8); - q14u8 = vabdq_u8(q5u8, q8u8); - - q15u8 = vqaddq_u8(q15u8, q15u8); - q14u8 = vshrq_n_u8(q14u8, 1); - q0u8 = vdupq_n_u8(0x80); - q13s16 = vdupq_n_s16(3); - q15u8 = vqaddq_u8(q15u8, q14u8); - - q5u8 = veorq_u8(q5u8, q0u8); - q6u8 = veorq_u8(q6u8, q0u8); - q7u8 = veorq_u8(q7u8, q0u8); - q8u8 = veorq_u8(q8u8, q0u8); - - q15u8 = vcgeq_u8(qblimit, q15u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)), - vget_low_s8(vreinterpretq_s8_u8(q6u8))); - q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)), - vget_high_s8(vreinterpretq_s8_u8(q6u8))); - - q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), - vreinterpretq_s8_u8(q8u8)); - - q2s16 = vmulq_s16(q2s16, q13s16); - q3s16 = vmulq_s16(q3s16, q13s16); - - q10u8 = vdupq_n_u8(3); - q9u8 = vdupq_n_u8(4); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8)); - q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8)); - - d8s8 = vqmovn_s16(q2s16); - d9s8 = vqmovn_s16(q3s16); - q4s8 = vcombine_s8(d8s8, d9s8); - - q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8)); - - q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8)); - q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q3s8 = vshrq_n_s8(q3s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8); - - q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - - vst1q_u8(s, q7u8); - s -= p; - vst1q_u8(s, q6u8); - return; -} - -void vp8_loop_filter_bhs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - return; -} - -void vp8_loop_filter_mbhs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c deleted file mode 100644 index 921bcad698..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" -#include "vpx_ports/arm.h" - -#ifdef VPX_INCOMPATIBLE_GCC -static INLINE void write_2x4(unsigned char *dst, int pitch, - const uint8x8x2_t result) { - /* - * uint8x8x2_t result - 00 01 02 03 | 04 05 06 07 - 10 11 12 13 | 14 15 16 17 - --- - * after vtrn_u8 - 00 10 02 12 | 04 14 06 16 - 01 11 03 13 | 05 15 07 17 - */ - const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], - result.val[1]); - const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]); - const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]); - vst1_lane_u16((uint16_t *)dst, x_0_4, 0); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 0); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 1); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 1); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 2); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 2); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 3); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 3); -} - -static INLINE void write_2x8(unsigned char *dst, int pitch, - const uint8x8x2_t result, - const uint8x8x2_t result2) { - write_2x4(dst, pitch, result); - dst += pitch * 8; - write_2x4(dst, pitch, result2); -} -#else -static INLINE void write_2x8(unsigned char *dst, int pitch, - const uint8x8x2_t result, - const uint8x8x2_t result2) { - vst2_lane_u8(dst, result, 0); - dst += pitch; - vst2_lane_u8(dst, result, 1); - dst += pitch; - vst2_lane_u8(dst, result, 2); - dst += pitch; - vst2_lane_u8(dst, result, 3); - dst += pitch; - vst2_lane_u8(dst, result, 4); - dst += pitch; - vst2_lane_u8(dst, result, 5); - dst += pitch; - vst2_lane_u8(dst, result, 6); - dst += pitch; - vst2_lane_u8(dst, result, 7); - dst += pitch; - - vst2_lane_u8(dst, result2, 0); - dst += pitch; - vst2_lane_u8(dst, result2, 1); - dst += pitch; - vst2_lane_u8(dst, result2, 2); - dst += pitch; - vst2_lane_u8(dst, result2, 3); - dst += pitch; - vst2_lane_u8(dst, result2, 4); - dst += pitch; - vst2_lane_u8(dst, result2, 5); - dst += pitch; - vst2_lane_u8(dst, result2, 6); - dst += pitch; - vst2_lane_u8(dst, result2, 7); -} -#endif // VPX_INCOMPATIBLE_GCC - - -#ifdef VPX_INCOMPATIBLE_GCC -static INLINE -uint8x8x4_t read_4x8(unsigned char *src, int pitch) { - uint8x8x4_t x; - const uint8x8_t a = vld1_u8(src); - const uint8x8_t b = vld1_u8(src + pitch * 1); - const uint8x8_t c = vld1_u8(src + pitch * 2); - const uint8x8_t d = vld1_u8(src + pitch * 3); - const uint8x8_t e = vld1_u8(src + pitch * 4); - const uint8x8_t f = vld1_u8(src + pitch * 5); - const uint8x8_t g = vld1_u8(src + pitch * 6); - const uint8x8_t h = vld1_u8(src + pitch * 7); - const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a), - vreinterpret_u32_u8(e)); - const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b), - vreinterpret_u32_u8(f)); - const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c), - vreinterpret_u32_u8(g)); - const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d), - vreinterpret_u32_u8(h)); - const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]), - vreinterpret_u16_u32(r26_u32.val[0])); - const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]), - vreinterpret_u16_u32(r37_u32.val[0])); - const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), - vreinterpret_u8_u16(r13_u16.val[0])); - const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), - vreinterpret_u8_u16(r13_u16.val[1])); - /* - * after vtrn_u32 - 00 01 02 03 | 40 41 42 43 - 10 11 12 13 | 50 51 52 53 - 20 21 22 23 | 60 61 62 63 - 30 31 32 33 | 70 71 72 73 - --- - * after vtrn_u16 - 00 01 20 21 | 40 41 60 61 - 02 03 22 23 | 42 43 62 63 - 10 11 30 31 | 50 51 70 71 - 12 13 32 33 | 52 52 72 73 - - 00 01 20 21 | 40 41 60 61 - 10 11 30 31 | 50 51 70 71 - 02 03 22 23 | 42 43 62 63 - 12 13 32 33 | 52 52 72 73 - --- - * after vtrn_u8 - 00 10 20 30 | 40 50 60 70 - 01 11 21 31 | 41 51 61 71 - 02 12 22 32 | 42 52 62 72 - 03 13 23 33 | 43 53 63 73 - */ - x.val[0] = r01_u8.val[0]; - x.val[1] = r01_u8.val[1]; - x.val[2] = r23_u8.val[0]; - x.val[3] = r23_u8.val[1]; - - return x; -} -#else -static INLINE -uint8x8x4_t read_4x8(unsigned char *src, int pitch) { - uint8x8x4_t x; - x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0); - x = vld4_lane_u8(src, x, 0); - src += pitch; - x = vld4_lane_u8(src, x, 1); - src += pitch; - x = vld4_lane_u8(src, x, 2); - src += pitch; - x = vld4_lane_u8(src, x, 3); - src += pitch; - x = vld4_lane_u8(src, x, 4); - src += pitch; - x = vld4_lane_u8(src, x, 5); - src += pitch; - x = vld4_lane_u8(src, x, 6); - src += pitch; - x = vld4_lane_u8(src, x, 7); - return x; -} -#endif // VPX_INCOMPATIBLE_GCC - -static INLINE void vp8_loop_filter_simple_vertical_edge_neon( - unsigned char *s, - int p, - const unsigned char *blimit) { - unsigned char *src1; - uint8x16_t qblimit, q0u8; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; - int16x8_t q2s16, q13s16, q11s16; - int8x8_t d28s8, d29s8; - int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8; - uint8x8x4_t d0u8x4; // d6, d7, d8, d9 - uint8x8x4_t d1u8x4; // d10, d11, d12, d13 - uint8x8x2_t d2u8x2; // d12, d13 - uint8x8x2_t d3u8x2; // d14, d15 - - qblimit = vdupq_n_u8(*blimit); - - src1 = s - 2; - d0u8x4 = read_4x8(src1, p); - src1 += p * 8; - d1u8x4 = read_4x8(src1, p); - - q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10 - q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12 - q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11 - q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13 - - q15u8 = vabdq_u8(q5u8, q4u8); - q14u8 = vabdq_u8(q3u8, q6u8); - - q15u8 = vqaddq_u8(q15u8, q15u8); - q14u8 = vshrq_n_u8(q14u8, 1); - q0u8 = vdupq_n_u8(0x80); - q11s16 = vdupq_n_s16(3); - q15u8 = vqaddq_u8(q15u8, q14u8); - - q3u8 = veorq_u8(q3u8, q0u8); - q4u8 = veorq_u8(q4u8, q0u8); - q5u8 = veorq_u8(q5u8, q0u8); - q6u8 = veorq_u8(q6u8, q0u8); - - q15u8 = vcgeq_u8(qblimit, q15u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)), - vget_low_s8(vreinterpretq_s8_u8(q5u8))); - q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)), - vget_high_s8(vreinterpretq_s8_u8(q5u8))); - - q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8), - vreinterpretq_s8_u8(q6u8)); - - q2s16 = vmulq_s16(q2s16, q11s16); - q13s16 = vmulq_s16(q13s16, q11s16); - - q11u8 = vdupq_n_u8(3); - q12u8 = vdupq_n_u8(4); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8)); - q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8)); - - d28s8 = vqmovn_s16(q2s16); - d29s8 = vqmovn_s16(q13s16); - q14s8 = vcombine_s8(d28s8, d29s8); - - q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8)); - - q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8)); - q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q14s8 = vshrq_n_s8(q3s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8); - - q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - - d2u8x2.val[0] = vget_low_u8(q6u8); // d12 - d2u8x2.val[1] = vget_low_u8(q7u8); // d14 - d3u8x2.val[0] = vget_high_u8(q6u8); // d13 - d3u8x2.val[1] = vget_high_u8(q7u8); // d15 - - src1 = s - 1; - write_2x8(src1, p, d2u8x2, d3u8x2); -} - -void vp8_loop_filter_bvs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - return; -} - -void vp8_loop_filter_mbvs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c deleted file mode 100644 index 5351f4be66..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c +++ /dev/null @@ -1,625 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" - -static INLINE void vp8_mbloop_filter_neon( - uint8x16_t qblimit, // mblimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p2 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q4r, // p1 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r, // q1 - uint8x16_t *q9r) { // q1 - uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8; - uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16; - int8x16_t q0s8, q12s8, q14s8, q15s8; - int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q1u8 = vabdq_u8(q9, q8); - q0u8 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q1u8 = vmaxq_u8(q1u8, q0u8); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q12u8 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q1u8); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - q1u8 = vabdq_u8(q5, q8); - q12u8 = vqaddq_u8(q12u8, q12u8); - - // vp8_filter() function - // convert to signed - q0u8 = vdupq_n_u8(0x80); - q9 = veorq_u8(q9, q0u8); - q8 = veorq_u8(q8, q0u8); - q7 = veorq_u8(q7, q0u8); - q6 = veorq_u8(q6, q0u8); - q5 = veorq_u8(q5, q0u8); - q4 = veorq_u8(q4, q0u8); - - q1u8 = vshrq_n_u8(q1u8, 1); - q12u8 = vqaddq_u8(q12u8, q1u8); - - q14u8 = vorrq_u8(q13u8, q14u8); - q12u8 = vcgeq_u8(qblimit, q12u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q11s16 = vdupq_n_s16(3); - q2s16 = vmulq_s16(q2s16, q11s16); - q13s16 = vmulq_s16(q13s16, q11s16); - - q15u8 = vandq_u8(q15u8, q12u8); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8)); - - q12u8 = vdupq_n_u8(3); - q11u8 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2 = vqmovn_s16(q2s16); - d3 = vqmovn_s16(q13s16); - q1s8 = vcombine_s8(d2, d3); - q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8)); - q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8)); - q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q13s8 = vshrq_n_s8(q13s8, 3); - - q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8); - q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8); - - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63); - d5 = vdup_n_s8(9); - d4 = vdup_n_s8(18); - - q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5); - q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5); - d5 = vdup_n_s8(27); - q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4); - q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4); - q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5); - q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5); - - d0 = vqshrn_n_s16(q0s16 , 7); - d1 = vqshrn_n_s16(q11s16, 7); - d24 = vqshrn_n_s16(q12s16, 7); - d25 = vqshrn_n_s16(q13s16, 7); - d28 = vqshrn_n_s16(q14s16, 7); - d29 = vqshrn_n_s16(q15s16, 7); - - q0s8 = vcombine_s8(d0, d1); - q12s8 = vcombine_s8(d24, d25); - q14s8 = vcombine_s8(d28, d29); - - q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8); - q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8); - q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8); - q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8); - q15s8 = vqsubq_s8((q7s8), q14s8); - q14s8 = vqaddq_s8((q6s8), q14s8); - - q1u8 = vdupq_n_u8(0x80); - *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8); - *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8); - *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8); - *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8); - *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8); - *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8); - return; -} - -void vp8_mbloop_filter_horizontal_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - src -= (pitch << 2); - - q3 = vld1q_u8(src); - src += pitch; - q4 = vld1q_u8(src); - src += pitch; - q5 = vld1q_u8(src); - src += pitch; - q6 = vld1q_u8(src); - src += pitch; - q7 = vld1q_u8(src); - src += pitch; - q8 = vld1q_u8(src); - src += pitch; - q9 = vld1q_u8(src); - src += pitch; - q10 = vld1q_u8(src); - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - src -= (pitch * 6); - vst1q_u8(src, q4); - src += pitch; - vst1q_u8(src, q5); - src += pitch; - vst1q_u8(src, q6); - src += pitch; - vst1q_u8(src, q7); - src += pitch; - vst1q_u8(src, q8); - src += pitch; - vst1q_u8(src, q9); - return; -} - -void vp8_mbloop_filter_horizontal_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - u -= (pitch << 2); - v -= (pitch << 2); - - d6 = vld1_u8(u); - u += pitch; - d7 = vld1_u8(v); - v += pitch; - d8 = vld1_u8(u); - u += pitch; - d9 = vld1_u8(v); - v += pitch; - d10 = vld1_u8(u); - u += pitch; - d11 = vld1_u8(v); - v += pitch; - d12 = vld1_u8(u); - u += pitch; - d13 = vld1_u8(v); - v += pitch; - d14 = vld1_u8(u); - u += pitch; - d15 = vld1_u8(v); - v += pitch; - d16 = vld1_u8(u); - u += pitch; - d17 = vld1_u8(v); - v += pitch; - d18 = vld1_u8(u); - u += pitch; - d19 = vld1_u8(v); - v += pitch; - d20 = vld1_u8(u); - d21 = vld1_u8(v); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - u -= (pitch * 6); - v -= (pitch * 6); - vst1_u8(u, vget_low_u8(q4)); - u += pitch; - vst1_u8(v, vget_high_u8(q4)); - v += pitch; - vst1_u8(u, vget_low_u8(q5)); - u += pitch; - vst1_u8(v, vget_high_u8(q5)); - v += pitch; - vst1_u8(u, vget_low_u8(q6)); - u += pitch; - vst1_u8(v, vget_high_u8(q6)); - v += pitch; - vst1_u8(u, vget_low_u8(q7)); - u += pitch; - vst1_u8(v, vget_high_u8(q7)); - v += pitch; - vst1_u8(u, vget_low_u8(q8)); - u += pitch; - vst1_u8(v, vget_high_u8(q8)); - v += pitch; - vst1_u8(u, vget_low_u8(q9)); - vst1_u8(v, vget_high_u8(q9)); - return; -} - -void vp8_mbloop_filter_vertical_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - unsigned char *s1, *s2; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - s1 = src - 4; - s2 = s1 + 8 * pitch; - d6 = vld1_u8(s1); - s1 += pitch; - d7 = vld1_u8(s2); - s2 += pitch; - d8 = vld1_u8(s1); - s1 += pitch; - d9 = vld1_u8(s2); - s2 += pitch; - d10 = vld1_u8(s1); - s1 += pitch; - d11 = vld1_u8(s2); - s2 += pitch; - d12 = vld1_u8(s1); - s1 += pitch; - d13 = vld1_u8(s2); - s2 += pitch; - d14 = vld1_u8(s1); - s1 += pitch; - d15 = vld1_u8(s2); - s2 += pitch; - d16 = vld1_u8(s1); - s1 += pitch; - d17 = vld1_u8(s2); - s2 += pitch; - d18 = vld1_u8(s1); - s1 += pitch; - d19 = vld1_u8(s2); - s2 += pitch; - d20 = vld1_u8(s1); - d21 = vld1_u8(s2); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - s1 -= 7 * pitch; - s2 -= 7 * pitch; - - vst1_u8(s1, vget_low_u8(q3)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q3)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q4)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q4)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q5)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q5)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q6)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q6)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q7)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q7)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q8)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q8)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q9)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q9)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q10)); - vst1_u8(s2, vget_high_u8(q10)); - return; -} - -void vp8_mbloop_filter_vertical_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - unsigned char *us, *ud; - unsigned char *vs, *vd; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - us = u - 4; - vs = v - 4; - d6 = vld1_u8(us); - us += pitch; - d7 = vld1_u8(vs); - vs += pitch; - d8 = vld1_u8(us); - us += pitch; - d9 = vld1_u8(vs); - vs += pitch; - d10 = vld1_u8(us); - us += pitch; - d11 = vld1_u8(vs); - vs += pitch; - d12 = vld1_u8(us); - us += pitch; - d13 = vld1_u8(vs); - vs += pitch; - d14 = vld1_u8(us); - us += pitch; - d15 = vld1_u8(vs); - vs += pitch; - d16 = vld1_u8(us); - us += pitch; - d17 = vld1_u8(vs); - vs += pitch; - d18 = vld1_u8(us); - us += pitch; - d19 = vld1_u8(vs); - vs += pitch; - d20 = vld1_u8(us); - d21 = vld1_u8(vs); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - ud = u - 4; - vst1_u8(ud, vget_low_u8(q3)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q4)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q5)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q6)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q7)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q8)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q9)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q10)); - - vd = v - 4; - vst1_u8(vd, vget_high_u8(q3)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q4)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q5)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q6)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q7)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q8)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q9)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q10)); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c deleted file mode 100644 index 373afa6ed3..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 35468; - -void vp8_short_idct4x4llm_neon( - int16_t *input, - unsigned char *pred_ptr, - int pred_stride, - unsigned char *dst_ptr, - int dst_stride) { - int i; - uint32x2_t d6u32 = vdup_n_u32(0); - uint8x8_t d1u8; - int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; - uint16x8_t q1u16; - int16x8_t q1s16, q2s16, q3s16, q4s16; - int32x2x2_t v2tmp0, v2tmp1; - int16x4x2_t v2tmp2, v2tmp3; - - d2 = vld1_s16(input); - d3 = vld1_s16(input + 4); - d4 = vld1_s16(input + 8); - d5 = vld1_s16(input + 12); - - // 1st for loop - q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here - q2s16 = vcombine_s16(d3, d5); - - q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); - q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); - - d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 - d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 - - q3s16 = vshrq_n_s16(q3s16, 1); - q4s16 = vshrq_n_s16(q4s16, 1); - - q3s16 = vqaddq_s16(q3s16, q2s16); - q4s16 = vqaddq_s16(q4s16, q2s16); - - d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 - d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), - vreinterpret_s16_s32(v2tmp1.val[0])); - v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), - vreinterpret_s16_s32(v2tmp1.val[1])); - - // 2nd for loop - q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]); - q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]); - - q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); - q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); - - d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 - d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 - - q3s16 = vshrq_n_s16(q3s16, 1); - q4s16 = vshrq_n_s16(q4s16, 1); - - q3s16 = vqaddq_s16(q3s16, q2s16); - q4s16 = vqaddq_s16(q4s16, q2s16); - - d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 - d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2 = vrshr_n_s16(d2, 3); - d3 = vrshr_n_s16(d3, 3); - d4 = vrshr_n_s16(d4, 3); - d5 = vrshr_n_s16(d5, 3); - - v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), - vreinterpret_s16_s32(v2tmp1.val[0])); - v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), - vreinterpret_s16_s32(v2tmp1.val[1])); - - q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]); - q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]); - - // dc_only_idct_add - for (i = 0; i < 2; i++, q1s16 = q2s16) { - d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0); - pred_ptr += pred_stride; - d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1); - pred_ptr += pred_stride; - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16), - vreinterpret_u8_u32(d6u32)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0); - dst_ptr += dst_stride; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1); - dst_ptr += dst_stride; - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c deleted file mode 100644 index 49d8d221fc..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c +++ /dev/null @@ -1,1377 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "vpx_ports/mem.h" - -static const int8_t vp8_sub_pel_filters[8][8] = { - {0, 0, 128, 0, 0, 0, 0, 0}, /* note that 1/8 pel positionyys are */ - {0, -6, 123, 12, -1, 0, 0, 0}, /* just as per alpha -0.5 bicubic */ - {2, -11, 108, 36, -8, 1, 0, 0}, /* New 1/4 pel 6 tap filter */ - {0, -9, 93, 50, -6, 0, 0, 0}, - {3, -16, 77, 77, -16, 3, 0, 0}, /* New 1/2 pel 6 tap filter */ - {0, -6, 50, 93, -9, 0, 0, 0}, - {1, -8, 36, 108, -11, 2, 0, 0}, /* New 1/4 pel 6 tap filter */ - {0, -1, 12, 123, -6, 0, 0, 0}, -}; - -void vp8_sixtap_predict8x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8; - uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8; - - if (xoffset == 0) { // secondpass_filter8x4_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - d27u8 = vld1_u8(src); - src += src_pixels_per_line; - d28u8 = vld1_u8(src); - src += src_pixels_per_line; - d29u8 = vld1_u8(src); - src += src_pixels_per_line; - d30u8 = vld1_u8(src); - - q3u16 = vmull_u8(d22u8, d0u8); - q4u16 = vmull_u8(d23u8, d0u8); - q5u16 = vmull_u8(d24u8, d0u8); - q6u16 = vmull_u8(d25u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d24u8, d2u8); - q4u16 = vmlal_u8(q4u16, d25u8, d2u8); - q5u16 = vmlal_u8(q5u16, d26u8, d2u8); - q6u16 = vmlal_u8(q6u16, d27u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d27u8, d5u8); - q4u16 = vmlal_u8(q4u16, d28u8, d5u8); - q5u16 = vmlal_u8(q5u16, d29u8, d5u8); - q6u16 = vmlal_u8(q6u16, d30u8, d5u8); - - q7u16 = vmull_u8(d25u8, d3u8); - q8u16 = vmull_u8(d26u8, d3u8); - q9u16 = vmull_u8(d27u8, d3u8); - q10u16 = vmull_u8(d28u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - - q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - - q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - - q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - - q7u16 = vmlal_u8(q7u16, d28u8, d2u8); - q8u16 = vmlal_u8(q8u16, d29u8, d2u8); - q9u16 = vmlal_u8(q9u16, d30u8, d2u8); - q10u16 = vmlal_u8(q10u16, d31u8, d2u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - q7u16 = vmlal_u8(q7u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - q9u16 = vmlal_u8(q9u16, d30u8, d5u8); - q10u16 = vmlal_u8(q10u16, d31u8, d5u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - - q3u16 = vmull_u8(d28u8, d3u8); - q4u16 = vmull_u8(d29u8, d3u8); - q5u16 = vmull_u8(d30u8, d3u8); - q6u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d22u8 = vqrshrun_n_s16(q7s16, 7); - d23u8 = vqrshrun_n_s16(q8s16, 7); - d24u8 = vqrshrun_n_s16(q9s16, 7); - d25u8 = vqrshrun_n_s16(q10s16, 7); - - if (yoffset == 0) { // firstpass_filter8x4_only - vst1_u8(dst_ptr, d22u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d23u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d24u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d25u8); - return; - } - - // First Pass on rest 5-line data - src += src_pixels_per_line; - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q7u8 = vld1q_u8(src); - - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); - - q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); - - q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); - - q8u16 = vmlal_u8(q8u16, d27u8, d2u8); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q11u16 = vmlal_u8(q11u16, d30u8, d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); - - q8u16 = vmlal_u8(q8u16, d27u8, d5u8); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q11u16 = vmlal_u8(q11u16, d30u8, d5u8); - q12u16 = vmlal_u8(q12u16, d31u8, d5u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); - - q3u16 = vmull_u8(d27u8, d3u8); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - - q8s16 = vqaddq_s16(q8s16, q3s16); - q9s16 = vqaddq_s16(q9s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q11s16 = vqaddq_s16(q11s16, q6s16); - q12s16 = vqaddq_s16(q12s16, q7s16); - - d26u8 = vqrshrun_n_s16(q8s16, 7); - d27u8 = vqrshrun_n_s16(q9s16, 7); - d28u8 = vqrshrun_n_s16(q10s16, 7); - d29u8 = vqrshrun_n_s16(q11s16, 7); - d30u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 8x4 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - q3u16 = vmull_u8(d22u8, d0u8); - q4u16 = vmull_u8(d23u8, d0u8); - q5u16 = vmull_u8(d24u8, d0u8); - q6u16 = vmull_u8(d25u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d24u8, d2u8); - q4u16 = vmlal_u8(q4u16, d25u8, d2u8); - q5u16 = vmlal_u8(q5u16, d26u8, d2u8); - q6u16 = vmlal_u8(q6u16, d27u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d27u8, d5u8); - q4u16 = vmlal_u8(q4u16, d28u8, d5u8); - q5u16 = vmlal_u8(q5u16, d29u8, d5u8); - q6u16 = vmlal_u8(q6u16, d30u8, d5u8); - - q7u16 = vmull_u8(d25u8, d3u8); - q8u16 = vmull_u8(d26u8, d3u8); - q9u16 = vmull_u8(d27u8, d3u8); - q10u16 = vmull_u8(d28u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - return; -} - -void vp8_sixtap_predict8x8_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src, *tmpp; - unsigned char tmp[64]; - int i; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8; - uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8; - - if (xoffset == 0) { // secondpass_filter8x8_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d18u8 = vld1_u8(src); - src += src_pixels_per_line; - d19u8 = vld1_u8(src); - src += src_pixels_per_line; - d20u8 = vld1_u8(src); - src += src_pixels_per_line; - d21u8 = vld1_u8(src); - src += src_pixels_per_line; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - d27u8 = vld1_u8(src); - src += src_pixels_per_line; - d28u8 = vld1_u8(src); - src += src_pixels_per_line; - d29u8 = vld1_u8(src); - src += src_pixels_per_line; - d30u8 = vld1_u8(src); - - for (i = 2; i > 0; i--) { - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - d23u8 = d27u8; - d24u8 = d28u8; - d25u8 = d29u8; - d26u8 = d30u8; - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - dst_ptr += dst_pitch; - } - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - - tmpp = tmp; - for (i = 2; i > 0; i--) { - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - __builtin_prefetch(src + src_pixels_per_line * 2); - - q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - - q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - - q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - - q7u16 = vmlal_u8(q7u16, d28u8, d2u8); - q8u16 = vmlal_u8(q8u16, d29u8, d2u8); - q9u16 = vmlal_u8(q9u16, d30u8, d2u8); - q10u16 = vmlal_u8(q10u16, d31u8, d2u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - q7u16 = vmlal_u8(q7u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - q9u16 = vmlal_u8(q9u16, d30u8, d5u8); - q10u16 = vmlal_u8(q10u16, d31u8, d5u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - - q3u16 = vmull_u8(d28u8, d3u8); - q4u16 = vmull_u8(d29u8, d3u8); - q5u16 = vmull_u8(d30u8, d3u8); - q6u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d22u8 = vqrshrun_n_s16(q7s16, 7); - d23u8 = vqrshrun_n_s16(q8s16, 7); - d24u8 = vqrshrun_n_s16(q9s16, 7); - d25u8 = vqrshrun_n_s16(q10s16, 7); - - if (yoffset == 0) { // firstpass_filter8x4_only - vst1_u8(dst_ptr, d22u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d23u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d24u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d25u8); - dst_ptr += dst_pitch; - } else { - vst1_u8(tmpp, d22u8); - tmpp += 8; - vst1_u8(tmpp, d23u8); - tmpp += 8; - vst1_u8(tmpp, d24u8); - tmpp += 8; - vst1_u8(tmpp, d25u8); - tmpp += 8; - } - } - if (yoffset == 0) - return; - - // First Pass on rest 5-line data - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q7u8 = vld1q_u8(src); - - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); - - q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); - - q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); - - q8u16 = vmlal_u8(q8u16, d27u8, d2u8); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q11u16 = vmlal_u8(q11u16, d30u8, d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); - - q8u16 = vmlal_u8(q8u16, d27u8, d5u8); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q11u16 = vmlal_u8(q11u16, d30u8, d5u8); - q12u16 = vmlal_u8(q12u16, d31u8, d5u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); - - q3u16 = vmull_u8(d27u8, d3u8); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - - q8s16 = vqaddq_s16(q8s16, q3s16); - q9s16 = vqaddq_s16(q9s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q11s16 = vqaddq_s16(q11s16, q6s16); - q12s16 = vqaddq_s16(q12s16, q7s16); - - d26u8 = vqrshrun_n_s16(q8s16, 7); - d27u8 = vqrshrun_n_s16(q9s16, 7); - d28u8 = vqrshrun_n_s16(q10s16, 7); - d29u8 = vqrshrun_n_s16(q11s16, 7); - d30u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 8x8 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - tmpp = tmp; - q9u8 = vld1q_u8(tmpp); - tmpp += 16; - q10u8 = vld1q_u8(tmpp); - tmpp += 16; - q11u8 = vld1q_u8(tmpp); - tmpp += 16; - q12u8 = vld1q_u8(tmpp); - - d18u8 = vget_low_u8(q9u8); - d19u8 = vget_high_u8(q9u8); - d20u8 = vget_low_u8(q10u8); - d21u8 = vget_high_u8(q10u8); - d22u8 = vget_low_u8(q11u8); - d23u8 = vget_high_u8(q11u8); - d24u8 = vget_low_u8(q12u8); - d25u8 = vget_high_u8(q12u8); - - for (i = 2; i > 0; i--) { - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - d23u8 = d27u8; - d24u8 = d28u8; - d25u8 = d29u8; - d26u8 = d30u8; - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - dst_ptr += dst_pitch; - } - return; -} - -void vp8_sixtap_predict16x16_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src, *src_tmp, *dst, *tmpp; - unsigned char tmp[336]; - int i, j; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8; - uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8; - uint8x8_t d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint8x16_t q3u8, q4u8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16; - uint16x8_t q11u16, q12u16, q13u16, q15u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16; - int16x8_t q11s16, q12s16, q13s16, q15s16; - - if (xoffset == 0) { // secondpass_filter8x8_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src_tmp = src_ptr - src_pixels_per_line * 2; - for (i = 0; i < 2; i++) { - src = src_tmp + i * 8; - dst = dst_ptr + i * 8; - d18u8 = vld1_u8(src); - src += src_pixels_per_line; - d19u8 = vld1_u8(src); - src += src_pixels_per_line; - d20u8 = vld1_u8(src); - src += src_pixels_per_line; - d21u8 = vld1_u8(src); - src += src_pixels_per_line; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - for (j = 0; j < 4; j++) { - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - - vst1_u8(dst, d6u8); - dst += dst_pitch; - vst1_u8(dst, d7u8); - dst += dst_pitch; - vst1_u8(dst, d8u8); - dst += dst_pitch; - vst1_u8(dst, d9u8); - dst += dst_pitch; - } - } - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) { // firstpass_filter4x4_only - src = src_ptr - 2; - dst = dst_ptr; - for (i = 0; i < 8; i++) { - d6u8 = vld1_u8(src); - d7u8 = vld1_u8(src + 8); - d8u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d9u8 = vld1_u8(src); - d10u8 = vld1_u8(src + 8); - d11u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - - q6u16 = vmull_u8(d6u8, d0u8); - q7u16 = vmull_u8(d7u8, d0u8); - q8u16 = vmull_u8(d9u8, d0u8); - q9u16 = vmull_u8(d10u8, d0u8); - - d20u8 = vext_u8(d6u8, d7u8, 1); - d21u8 = vext_u8(d9u8, d10u8, 1); - d22u8 = vext_u8(d7u8, d8u8, 1); - d23u8 = vext_u8(d10u8, d11u8, 1); - d24u8 = vext_u8(d6u8, d7u8, 4); - d25u8 = vext_u8(d9u8, d10u8, 4); - d26u8 = vext_u8(d7u8, d8u8, 4); - d27u8 = vext_u8(d10u8, d11u8, 4); - d28u8 = vext_u8(d6u8, d7u8, 5); - d29u8 = vext_u8(d9u8, d10u8, 5); - - q6u16 = vmlsl_u8(q6u16, d20u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d21u8, d1u8); - q7u16 = vmlsl_u8(q7u16, d22u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d23u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d24u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d25u8, d4u8); - q7u16 = vmlsl_u8(q7u16, d26u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d27u8, d4u8); - q6u16 = vmlal_u8(q6u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - - d20u8 = vext_u8(d7u8, d8u8, 5); - d21u8 = vext_u8(d10u8, d11u8, 5); - d22u8 = vext_u8(d6u8, d7u8, 2); - d23u8 = vext_u8(d9u8, d10u8, 2); - d24u8 = vext_u8(d7u8, d8u8, 2); - d25u8 = vext_u8(d10u8, d11u8, 2); - d26u8 = vext_u8(d6u8, d7u8, 3); - d27u8 = vext_u8(d9u8, d10u8, 3); - d28u8 = vext_u8(d7u8, d8u8, 3); - d29u8 = vext_u8(d10u8, d11u8, 3); - - q7u16 = vmlal_u8(q7u16, d20u8, d5u8); - q9u16 = vmlal_u8(q9u16, d21u8, d5u8); - q6u16 = vmlal_u8(q6u16, d22u8, d2u8); - q8u16 = vmlal_u8(q8u16, d23u8, d2u8); - q7u16 = vmlal_u8(q7u16, d24u8, d2u8); - q9u16 = vmlal_u8(q9u16, d25u8, d2u8); - - q10u16 = vmull_u8(d26u8, d3u8); - q11u16 = vmull_u8(d27u8, d3u8); - q12u16 = vmull_u8(d28u8, d3u8); - q15u16 = vmull_u8(d29u8, d3u8); - - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q15s16 = vreinterpretq_s16_u16(q15u16); - - q6s16 = vqaddq_s16(q6s16, q10s16); - q8s16 = vqaddq_s16(q8s16, q11s16); - q7s16 = vqaddq_s16(q7s16, q12s16); - q9s16 = vqaddq_s16(q9s16, q15s16); - - d6u8 = vqrshrun_n_s16(q6s16, 7); - d7u8 = vqrshrun_n_s16(q7s16, 7); - d8u8 = vqrshrun_n_s16(q8s16, 7); - d9u8 = vqrshrun_n_s16(q9s16, 7); - - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - vst1q_u8(dst, q3u8); - dst += dst_pitch; - vst1q_u8(dst, q4u8); - dst += dst_pitch; - } - return; - } - - src = src_ptr - 2 - src_pixels_per_line * 2; - tmpp = tmp; - for (i = 0; i < 7; i++) { - d6u8 = vld1_u8(src); - d7u8 = vld1_u8(src + 8); - d8u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d9u8 = vld1_u8(src); - d10u8 = vld1_u8(src + 8); - d11u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d12u8 = vld1_u8(src); - d13u8 = vld1_u8(src + 8); - d14u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - __builtin_prefetch(src + src_pixels_per_line * 2); - - q8u16 = vmull_u8(d6u8, d0u8); - q9u16 = vmull_u8(d7u8, d0u8); - q10u16 = vmull_u8(d9u8, d0u8); - q11u16 = vmull_u8(d10u8, d0u8); - q12u16 = vmull_u8(d12u8, d0u8); - q13u16 = vmull_u8(d13u8, d0u8); - - d28u8 = vext_u8(d6u8, d7u8, 1); - d29u8 = vext_u8(d9u8, d10u8, 1); - d30u8 = vext_u8(d12u8, d13u8, 1); - q8u16 = vmlsl_u8(q8u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d30u8, d1u8); - d28u8 = vext_u8(d7u8, d8u8, 1); - d29u8 = vext_u8(d10u8, d11u8, 1); - d30u8 = vext_u8(d13u8, d14u8, 1); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d29u8, d1u8); - q13u16 = vmlsl_u8(q13u16, d30u8, d1u8); - - d28u8 = vext_u8(d6u8, d7u8, 4); - d29u8 = vext_u8(d9u8, d10u8, 4); - d30u8 = vext_u8(d12u8, d13u8, 4); - q8u16 = vmlsl_u8(q8u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d30u8, d4u8); - d28u8 = vext_u8(d7u8, d8u8, 4); - d29u8 = vext_u8(d10u8, d11u8, 4); - d30u8 = vext_u8(d13u8, d14u8, 4); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d29u8, d4u8); - q13u16 = vmlsl_u8(q13u16, d30u8, d4u8); - - d28u8 = vext_u8(d6u8, d7u8, 5); - d29u8 = vext_u8(d9u8, d10u8, 5); - d30u8 = vext_u8(d12u8, d13u8, 5); - q8u16 = vmlal_u8(q8u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q12u16 = vmlal_u8(q12u16, d30u8, d5u8); - d28u8 = vext_u8(d7u8, d8u8, 5); - d29u8 = vext_u8(d10u8, d11u8, 5); - d30u8 = vext_u8(d13u8, d14u8, 5); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q11u16 = vmlal_u8(q11u16, d29u8, d5u8); - q13u16 = vmlal_u8(q13u16, d30u8, d5u8); - - d28u8 = vext_u8(d6u8, d7u8, 2); - d29u8 = vext_u8(d9u8, d10u8, 2); - d30u8 = vext_u8(d12u8, d13u8, 2); - q8u16 = vmlal_u8(q8u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q12u16 = vmlal_u8(q12u16, d30u8, d2u8); - d28u8 = vext_u8(d7u8, d8u8, 2); - d29u8 = vext_u8(d10u8, d11u8, 2); - d30u8 = vext_u8(d13u8, d14u8, 2); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q11u16 = vmlal_u8(q11u16, d29u8, d2u8); - q13u16 = vmlal_u8(q13u16, d30u8, d2u8); - - d28u8 = vext_u8(d6u8, d7u8, 3); - d29u8 = vext_u8(d9u8, d10u8, 3); - d30u8 = vext_u8(d12u8, d13u8, 3); - d15u8 = vext_u8(d7u8, d8u8, 3); - d31u8 = vext_u8(d10u8, d11u8, 3); - d6u8 = vext_u8(d13u8, d14u8, 3); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q12s16 = vqaddq_s16(q12s16, q6s16); - - q6u16 = vmull_u8(d15u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - q3u16 = vmull_u8(d6u8, d3u8); - q3s16 = vreinterpretq_s16_u16(q3u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q13s16 = vreinterpretq_s16_u16(q13u16); - q9s16 = vqaddq_s16(q9s16, q6s16); - q11s16 = vqaddq_s16(q11s16, q7s16); - q13s16 = vqaddq_s16(q13s16, q3s16); - - d6u8 = vqrshrun_n_s16(q8s16, 7); - d7u8 = vqrshrun_n_s16(q9s16, 7); - d8u8 = vqrshrun_n_s16(q10s16, 7); - d9u8 = vqrshrun_n_s16(q11s16, 7); - d10u8 = vqrshrun_n_s16(q12s16, 7); - d11u8 = vqrshrun_n_s16(q13s16, 7); - - vst1_u8(tmpp, d6u8); - tmpp += 8; - vst1_u8(tmpp, d7u8); - tmpp += 8; - vst1_u8(tmpp, d8u8); - tmpp += 8; - vst1_u8(tmpp, d9u8); - tmpp += 8; - vst1_u8(tmpp, d10u8); - tmpp += 8; - vst1_u8(tmpp, d11u8); - tmpp += 8; - } - - // Second pass: 16x16 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - for (i = 0; i < 2; i++) { - dst = dst_ptr + 8 * i; - tmpp = tmp + 8 * i; - d18u8 = vld1_u8(tmpp); - tmpp += 16; - d19u8 = vld1_u8(tmpp); - tmpp += 16; - d20u8 = vld1_u8(tmpp); - tmpp += 16; - d21u8 = vld1_u8(tmpp); - tmpp += 16; - d22u8 = vld1_u8(tmpp); - tmpp += 16; - for (j = 0; j < 4; j++) { - d23u8 = vld1_u8(tmpp); - tmpp += 16; - d24u8 = vld1_u8(tmpp); - tmpp += 16; - d25u8 = vld1_u8(tmpp); - tmpp += 16; - d26u8 = vld1_u8(tmpp); - tmpp += 16; - - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - - vst1_u8(dst, d6u8); - dst += dst_pitch; - vst1_u8(dst, d7u8); - dst += dst_pitch; - vst1_u8(dst, d8u8); - dst += dst_pitch; - vst1_u8(dst, d9u8); - dst += dst_pitch; - } - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c deleted file mode 100644 index 9d6807af71..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c +++ /dev/null @@ -1,550 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" -#include "vpx_ports/arm.h" - -static INLINE void vp8_loop_filter_neon( - uint8x16_t qblimit, // flimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p3 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r) { // q1 - uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q2s16, q11s16; - uint16x8_t q4u16; - int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8; - int8x8_t d2s8, d3s8; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q3 = vabdq_u8(q9, q8); - q4 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q3 = vmaxq_u8(q3, q4); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q9 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q3); - - q2u8 = vabdq_u8(q5, q8); - q9 = vqaddq_u8(q9, q9); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - // vp8_filter() function - // convert to signed - q10 = vdupq_n_u8(0x80); - q8 = veorq_u8(q8, q10); - q7 = veorq_u8(q7, q10); - q6 = veorq_u8(q6, q10); - q5 = veorq_u8(q5, q10); - - q2u8 = vshrq_n_u8(q2u8, 1); - q9 = vqaddq_u8(q9, q2u8); - - q10 = vdupq_n_u8(3); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q9 = vcgeq_u8(qblimit, q9); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q14u8 = vorrq_u8(q13u8, q14u8); - - q4u16 = vmovl_u8(vget_low_u8(q10)); - q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); - q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); - - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); - q15u8 = vandq_u8(q15u8, q9); - - q1s8 = vreinterpretq_s8_u8(q1u8); - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); - - q9 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2s8 = vqmovn_s16(q2s16); - d3s8 = vqmovn_s16(q11s16); - q1s8 = vcombine_s8(d2s8, d3s8); - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); - q1s8 = vreinterpretq_s8_u8(q1u8); - - q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10)); - q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); - q2s8 = vshrq_n_s8(q2s8, 3); - q1s8 = vshrq_n_s8(q1s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); - - q1s8 = vrshrq_n_s8(q1s8, 1); - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); - q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); - - q0u8 = vdupq_n_u8(0x80); - *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8); - *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8); - return; -} - -void vp8_loop_filter_horizontal_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - src -= (pitch << 2); - - q3 = vld1q_u8(src); - src += pitch; - q4 = vld1q_u8(src); - src += pitch; - q5 = vld1q_u8(src); - src += pitch; - q6 = vld1q_u8(src); - src += pitch; - q7 = vld1q_u8(src); - src += pitch; - q8 = vld1q_u8(src); - src += pitch; - q9 = vld1q_u8(src); - src += pitch; - q10 = vld1q_u8(src); - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - src -= (pitch * 5); - vst1q_u8(src, q5); - src += pitch; - vst1q_u8(src, q6); - src += pitch; - vst1q_u8(src, q7); - src += pitch; - vst1q_u8(src, q8); - return; -} - -void vp8_loop_filter_horizontal_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - u -= (pitch << 2); - v -= (pitch << 2); - - d6 = vld1_u8(u); - u += pitch; - d7 = vld1_u8(v); - v += pitch; - d8 = vld1_u8(u); - u += pitch; - d9 = vld1_u8(v); - v += pitch; - d10 = vld1_u8(u); - u += pitch; - d11 = vld1_u8(v); - v += pitch; - d12 = vld1_u8(u); - u += pitch; - d13 = vld1_u8(v); - v += pitch; - d14 = vld1_u8(u); - u += pitch; - d15 = vld1_u8(v); - v += pitch; - d16 = vld1_u8(u); - u += pitch; - d17 = vld1_u8(v); - v += pitch; - d18 = vld1_u8(u); - u += pitch; - d19 = vld1_u8(v); - v += pitch; - d20 = vld1_u8(u); - d21 = vld1_u8(v); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - u -= (pitch * 5); - vst1_u8(u, vget_low_u8(q5)); - u += pitch; - vst1_u8(u, vget_low_u8(q6)); - u += pitch; - vst1_u8(u, vget_low_u8(q7)); - u += pitch; - vst1_u8(u, vget_low_u8(q8)); - - v -= (pitch * 5); - vst1_u8(v, vget_high_u8(q5)); - v += pitch; - vst1_u8(v, vget_high_u8(q6)); - v += pitch; - vst1_u8(v, vget_high_u8(q7)); - v += pitch; - vst1_u8(v, vget_high_u8(q8)); - return; -} - -static INLINE void write_4x8(unsigned char *dst, int pitch, - const uint8x8x4_t result) { -#ifdef VPX_INCOMPATIBLE_GCC - /* - * uint8x8x4_t result - 00 01 02 03 | 04 05 06 07 - 10 11 12 13 | 14 15 16 17 - 20 21 22 23 | 24 25 26 27 - 30 31 32 33 | 34 35 36 37 - --- - * after vtrn_u16 - 00 01 20 21 | 04 05 24 25 - 02 03 22 23 | 06 07 26 27 - 10 11 30 31 | 14 15 34 35 - 12 13 32 33 | 16 17 36 37 - --- - * after vtrn_u8 - 00 10 20 30 | 04 14 24 34 - 01 11 21 31 | 05 15 25 35 - 02 12 22 32 | 06 16 26 36 - 03 13 23 33 | 07 17 27 37 - */ - const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), - vreinterpret_u16_u8(result.val[2])); - const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), - vreinterpret_u16_u8(result.val[3])); - const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), - vreinterpret_u8_u16(r13_u16.val[0])); - const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), - vreinterpret_u8_u16(r13_u16.val[1])); - const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); - const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); - const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); - const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); - vst1_lane_u32((uint32_t *)dst, x_0_4, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_0_4, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 1); -#else - vst4_lane_u8(dst, result, 0); - dst += pitch; - vst4_lane_u8(dst, result, 1); - dst += pitch; - vst4_lane_u8(dst, result, 2); - dst += pitch; - vst4_lane_u8(dst, result, 3); - dst += pitch; - vst4_lane_u8(dst, result, 4); - dst += pitch; - vst4_lane_u8(dst, result, 5); - dst += pitch; - vst4_lane_u8(dst, result, 6); - dst += pitch; - vst4_lane_u8(dst, result, 7); -#endif // VPX_INCOMPATIBLE_GCC -} - -void vp8_loop_filter_vertical_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - unsigned char *s, *d; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - uint8x8x4_t q4ResultH, q4ResultL; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - s = src - 4; - d6 = vld1_u8(s); - s += pitch; - d8 = vld1_u8(s); - s += pitch; - d10 = vld1_u8(s); - s += pitch; - d12 = vld1_u8(s); - s += pitch; - d14 = vld1_u8(s); - s += pitch; - d16 = vld1_u8(s); - s += pitch; - d18 = vld1_u8(s); - s += pitch; - d20 = vld1_u8(s); - s += pitch; - d7 = vld1_u8(s); - s += pitch; - d9 = vld1_u8(s); - s += pitch; - d11 = vld1_u8(s); - s += pitch; - d13 = vld1_u8(s); - s += pitch; - d15 = vld1_u8(s); - s += pitch; - d17 = vld1_u8(s); - s += pitch; - d19 = vld1_u8(s); - s += pitch; - d21 = vld1_u8(s); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - q4ResultL.val[0] = vget_low_u8(q5); // d10 - q4ResultL.val[1] = vget_low_u8(q6); // d12 - q4ResultL.val[2] = vget_low_u8(q7); // d14 - q4ResultL.val[3] = vget_low_u8(q8); // d16 - q4ResultH.val[0] = vget_high_u8(q5); // d11 - q4ResultH.val[1] = vget_high_u8(q6); // d13 - q4ResultH.val[2] = vget_high_u8(q7); // d15 - q4ResultH.val[3] = vget_high_u8(q8); // d17 - - d = src - 2; - write_4x8(d, pitch, q4ResultL); - d += pitch * 8; - write_4x8(d, pitch, q4ResultH); -} - -void vp8_loop_filter_vertical_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - unsigned char *us, *ud; - unsigned char *vs, *vd; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - uint8x8x4_t q4ResultH, q4ResultL; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - us = u - 4; - d6 = vld1_u8(us); - us += pitch; - d8 = vld1_u8(us); - us += pitch; - d10 = vld1_u8(us); - us += pitch; - d12 = vld1_u8(us); - us += pitch; - d14 = vld1_u8(us); - us += pitch; - d16 = vld1_u8(us); - us += pitch; - d18 = vld1_u8(us); - us += pitch; - d20 = vld1_u8(us); - - vs = v - 4; - d7 = vld1_u8(vs); - vs += pitch; - d9 = vld1_u8(vs); - vs += pitch; - d11 = vld1_u8(vs); - vs += pitch; - d13 = vld1_u8(vs); - vs += pitch; - d15 = vld1_u8(vs); - vs += pitch; - d17 = vld1_u8(vs); - vs += pitch; - d19 = vld1_u8(vs); - vs += pitch; - d21 = vld1_u8(vs); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - q4ResultL.val[0] = vget_low_u8(q5); // d10 - q4ResultL.val[1] = vget_low_u8(q6); // d12 - q4ResultL.val[2] = vget_low_u8(q7); // d14 - q4ResultL.val[3] = vget_low_u8(q8); // d16 - ud = u - 2; - write_4x8(ud, pitch, q4ResultL); - - q4ResultH.val[0] = vget_high_u8(q5); // d11 - q4ResultH.val[1] = vget_high_u8(q6); // d13 - q4ResultH.val[2] = vget_high_u8(q7); // d15 - q4ResultH.val[3] = vget_high_u8(q8); // d17 - vd = v - 2; - write_4x8(vd, pitch, q4ResultH); -} diff --git a/thirdparty/libvpx/vp8/common/blockd.c b/thirdparty/libvpx/vp8/common/blockd.c deleted file mode 100644 index 1fc3cd0ca7..0000000000 --- a/thirdparty/libvpx/vp8/common/blockd.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "blockd.h" -#include "vpx_mem/vpx_mem.h" - -const unsigned char vp8_block2left[25] = -{ - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; -const unsigned char vp8_block2above[25] = -{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 -}; diff --git a/thirdparty/libvpx/vp8/common/blockd.h b/thirdparty/libvpx/vp8/common/blockd.h deleted file mode 100644 index 192108a06d..0000000000 --- a/thirdparty/libvpx/vp8/common/blockd.h +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_BLOCKD_H_ -#define VP8_COMMON_BLOCKD_H_ - -void vpx_log(const char *format, ...); - -#include "vpx_config.h" -#include "vpx_scale/yv12config.h" -#include "mv.h" -#include "treecoder.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*#define DCPRED 1*/ -#define DCPREDSIMTHRESH 0 -#define DCPREDCNTTHRESH 3 - -#define MB_FEATURE_TREE_PROBS 3 -#define MAX_MB_SEGMENTS 4 - -#define MAX_REF_LF_DELTAS 4 -#define MAX_MODE_LF_DELTAS 4 - -/* Segment Feature Masks */ -#define SEGMENT_DELTADATA 0 -#define SEGMENT_ABSDATA 1 - -typedef struct -{ - int r, c; -} POS; - -#define PLANE_TYPE_Y_NO_DC 0 -#define PLANE_TYPE_Y2 1 -#define PLANE_TYPE_UV 2 -#define PLANE_TYPE_Y_WITH_DC 3 - - -typedef char ENTROPY_CONTEXT; -typedef struct -{ - ENTROPY_CONTEXT y1[4]; - ENTROPY_CONTEXT u[2]; - ENTROPY_CONTEXT v[2]; - ENTROPY_CONTEXT y2; -} ENTROPY_CONTEXT_PLANES; - -extern const unsigned char vp8_block2left[25]; -extern const unsigned char vp8_block2above[25]; - -#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ - Dest = (A)+(B); - - -typedef enum -{ - KEY_FRAME = 0, - INTER_FRAME = 1 -} FRAME_TYPE; - -typedef enum -{ - DC_PRED, /* average of above and left pixels */ - V_PRED, /* vertical prediction */ - H_PRED, /* horizontal prediction */ - TM_PRED, /* Truemotion prediction */ - B_PRED, /* block based prediction, each block has its own prediction mode */ - - NEARESTMV, - NEARMV, - ZEROMV, - NEWMV, - SPLITMV, - - MB_MODE_COUNT -} MB_PREDICTION_MODE; - -/* Macroblock level features */ -typedef enum -{ - MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */ - MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */ - MB_LVL_MAX = 2 /* Number of MB level features supported */ - -} MB_LVL_FEATURES; - -/* Segment Feature Masks */ -#define SEGMENT_ALTQ 0x01 -#define SEGMENT_ALT_LF 0x02 - -#define VP8_YMODES (B_PRED + 1) -#define VP8_UV_MODES (TM_PRED + 1) - -#define VP8_MVREFS (1 + SPLITMV - NEARESTMV) - -typedef enum -{ - B_DC_PRED, /* average of above and left pixels */ - B_TM_PRED, - - B_VE_PRED, /* vertical prediction */ - B_HE_PRED, /* horizontal prediction */ - - B_LD_PRED, - B_RD_PRED, - - B_VR_PRED, - B_VL_PRED, - B_HD_PRED, - B_HU_PRED, - - LEFT4X4, - ABOVE4X4, - ZERO4X4, - NEW4X4, - - B_MODE_COUNT -} B_PREDICTION_MODE; - -#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */ -#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4) - -/* For keyframes, intra block modes are predicted by the (already decoded) - modes for the Y blocks to the left and above us; for interframes, there - is a single probability table. */ - -union b_mode_info -{ - B_PREDICTION_MODE as_mode; - int_mv mv; -}; - -typedef enum -{ - INTRA_FRAME = 0, - LAST_FRAME = 1, - GOLDEN_FRAME = 2, - ALTREF_FRAME = 3, - MAX_REF_FRAMES = 4 -} MV_REFERENCE_FRAME; - -typedef struct -{ - uint8_t mode, uv_mode; - uint8_t ref_frame; - uint8_t is_4x4; - int_mv mv; - - uint8_t partitioning; - uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ - uint8_t need_to_clamp_mvs; - uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */ -} MB_MODE_INFO; - -typedef struct modeinfo -{ - MB_MODE_INFO mbmi; - union b_mode_info bmi[16]; -} MODE_INFO; - -#if CONFIG_MULTI_RES_ENCODING -/* The mb-level information needed to be stored for higher-resolution encoder */ -typedef struct -{ - MB_PREDICTION_MODE mode; - MV_REFERENCE_FRAME ref_frame; - int_mv mv; - int dissim; /* dissimilarity level of the macroblock */ -} LOWER_RES_MB_INFO; - -/* The frame-level information needed to be stored for higher-resolution - * encoder */ -typedef struct -{ - FRAME_TYPE frame_type; - int is_frame_dropped; - // The frame rate for the lowest resolution. - double low_res_framerate; - /* The frame number of each reference frames */ - unsigned int low_res_ref_frames[MAX_REF_FRAMES]; - // The video frame counter value for the key frame, for lowest resolution. - unsigned int key_frame_counter_value; - LOWER_RES_MB_INFO *mb_info; -} LOWER_RES_FRAME_INFO; -#endif - -typedef struct blockd -{ - short *qcoeff; - short *dqcoeff; - unsigned char *predictor; - short *dequant; - - int offset; - char *eob; - - union b_mode_info bmi; -} BLOCKD; - -typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -typedef struct macroblockd -{ - DECLARE_ALIGNED(16, unsigned char, predictor[384]); - DECLARE_ALIGNED(16, short, qcoeff[400]); - DECLARE_ALIGNED(16, short, dqcoeff[400]); - DECLARE_ALIGNED(16, char, eobs[25]); - - DECLARE_ALIGNED(16, short, dequant_y1[16]); - DECLARE_ALIGNED(16, short, dequant_y1_dc[16]); - DECLARE_ALIGNED(16, short, dequant_y2[16]); - DECLARE_ALIGNED(16, short, dequant_uv[16]); - - /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ - BLOCKD block[25]; - int fullpixel_mask; - - YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ - YV12_BUFFER_CONFIG dst; - - MODE_INFO *mode_info_context; - int mode_info_stride; - - FRAME_TYPE frame_type; - - int up_available; - int left_available; - - unsigned char *recon_above[3]; - unsigned char *recon_left[3]; - int recon_left_stride[2]; - - /* Y,U,V,Y2 */ - ENTROPY_CONTEXT_PLANES *above_context; - ENTROPY_CONTEXT_PLANES *left_context; - - /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */ - unsigned char segmentation_enabled; - - /* 0 (do not update) 1 (update) the macroblock segmentation map. */ - unsigned char update_mb_segmentation_map; - - /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ - unsigned char update_mb_segmentation_data; - - /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ - unsigned char mb_segement_abs_delta; - - /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */ - /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */ - vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */ - - signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */ - - /* mode_based Loop filter adjustment */ - unsigned char mode_ref_lf_delta_enabled; - unsigned char mode_ref_lf_delta_update; - - /* Delta values have the range +/- MAX_LOOP_FILTER */ - signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ - signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ - signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ - signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ - - /* Distance of MB away from frame edges */ - int mb_to_left_edge; - int mb_to_right_edge; - int mb_to_top_edge; - int mb_to_bottom_edge; - - - - vp8_subpix_fn_t subpixel_predict; - vp8_subpix_fn_t subpixel_predict8x4; - vp8_subpix_fn_t subpixel_predict8x8; - vp8_subpix_fn_t subpixel_predict16x16; - - void *current_bc; - - int corrupted; - -#if ARCH_X86 || ARCH_X86_64 - /* This is an intermediate buffer currently used in sub-pixel motion search - * to keep a copy of the reference area. This buffer can be used for other - * purpose. - */ - DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]); -#endif -} MACROBLOCKD; - - -extern void vp8_build_block_doffsets(MACROBLOCKD *x); -extern void vp8_setup_block_dptrs(MACROBLOCKD *x); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_BLOCKD_H_ diff --git a/thirdparty/libvpx/vp8/common/coefupdateprobs.h b/thirdparty/libvpx/vp8/common/coefupdateprobs.h deleted file mode 100644 index d96a19e747..0000000000 --- a/thirdparty/libvpx/vp8/common/coefupdateprobs.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_COEFUPDATEPROBS_H_ -#define VP8_COMMON_COEFUPDATEPROBS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Update probabilities for the nodes in the token entropy tree. - Generated file included by entropy.c */ - -const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = -{ - { - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, }, - {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, }, - {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, }, - {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_COEFUPDATEPROBS_H_ diff --git a/thirdparty/libvpx/vp8/common/common.h b/thirdparty/libvpx/vp8/common/common.h deleted file mode 100644 index e58a9cc23b..0000000000 --- a/thirdparty/libvpx/vp8/common/common.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_COMMON_H_ -#define VP8_COMMON_COMMON_H_ - -#include - -/* Interface header for common constant data structures and lookup tables */ - -#include "vpx_mem/vpx_mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Only need this for fixed-size arrays, for structs just assign. */ - -#define vp8_copy( Dest, Src) { \ - assert( sizeof( Dest) == sizeof( Src)); \ - memcpy( Dest, Src, sizeof( Src)); \ - } - -/* Use this for variably-sized arrays. */ - -#define vp8_copy_array( Dest, Src, N) { \ - assert( sizeof( *Dest) == sizeof( *Src)); \ - memcpy( Dest, Src, N * sizeof( *Src)); \ - } - -#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest)); - -#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest)); - - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_COMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/copy_c.c b/thirdparty/libvpx/vp8/common/copy_c.c deleted file mode 100644 index e3392913f6..0000000000 --- a/thirdparty/libvpx/vp8/common/copy_c.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include - -#include "./vp8_rtcd.h" -#include "vpx/vpx_integer.h" - -/* Copy 2 macroblocks to a buffer */ -void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride, - unsigned char *dst_ptr, int dst_stride, - int height) -{ - int r; - - for (r = 0; r < height; r++) - { - memcpy(dst_ptr, src_ptr, 32); - - src_ptr += src_stride; - dst_ptr += dst_stride; - - } -} diff --git a/thirdparty/libvpx/vp8/common/debugmodes.c b/thirdparty/libvpx/vp8/common/debugmodes.c deleted file mode 100644 index 159fddc6a7..0000000000 --- a/thirdparty/libvpx/vp8/common/debugmodes.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include "blockd.h" - - -void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame) -{ - - int mb_row; - int mb_col; - int mb_index = 0; - FILE *mvs = fopen("mvs.stt", "a"); - - /* print out the macroblock Y modes */ - mb_index = 0; - fprintf(mvs, "Mb Modes for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - - fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode); - - mb_index++; - } - - fprintf(mvs, "\n"); - mb_index++; - } - - fprintf(mvs, "\n"); - - mb_index = 0; - fprintf(mvs, "Mb mv ref for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - - fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame); - - mb_index++; - } - - fprintf(mvs, "\n"); - mb_index++; - } - - fprintf(mvs, "\n"); - - /* print out the macroblock UV modes */ - mb_index = 0; - fprintf(mvs, "UV Modes for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - - fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode); - - mb_index++; - } - - mb_index++; - fprintf(mvs, "\n"); - } - - fprintf(mvs, "\n"); - - /* print out the block modes */ - fprintf(mvs, "Mbs for Frame %d\n", frame); - { - int b_row; - - for (b_row = 0; b_row < 4 * rows; b_row++) - { - int b_col; - int bindex; - - for (b_col = 0; b_col < 4 * cols; b_col++) - { - mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); - bindex = (b_row & 3) * 4 + (b_col & 3); - - if (mi[mb_index].mbmi.mode == B_PRED) - fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode); - else - fprintf(mvs, "xx "); - - } - - fprintf(mvs, "\n"); - } - } - fprintf(mvs, "\n"); - - /* print out the macroblock mvs */ - mb_index = 0; - fprintf(mvs, "MVs for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2); - - mb_index++; - } - - mb_index++; - fprintf(mvs, "\n"); - } - - fprintf(mvs, "\n"); - - - /* print out the block modes */ - fprintf(mvs, "MVs for Frame %d\n", frame); - { - int b_row; - - for (b_row = 0; b_row < 4 * rows; b_row++) - { - int b_col; - int bindex; - - for (b_col = 0; b_col < 4 * cols; b_col++) - { - mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); - bindex = (b_row & 3) * 4 + (b_col & 3); - fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col); - - } - - fprintf(mvs, "\n"); - } - } - fprintf(mvs, "\n"); - - - fclose(mvs); -} diff --git a/thirdparty/libvpx/vp8/common/default_coef_probs.h b/thirdparty/libvpx/vp8/common/default_coef_probs.h deleted file mode 100644 index 4d69e4be66..0000000000 --- a/thirdparty/libvpx/vp8/common/default_coef_probs.h +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. -*/ - -#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_ -#define VP8_COMMON_DEFAULT_COEF_PROBS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/*Generated file, included by entropy.c*/ - - -static const vp8_prob default_coef_probs [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = -{ - { /* Block Type ( 0 ) */ - { /* Coeff Band ( 0 )*/ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, - { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, - { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, - { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, - { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, - { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, - { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, - { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, - { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, - { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, - { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, - { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, - { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, - { /* Block Type ( 1 ) */ - { /* Coeff Band ( 0 )*/ - { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, - { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, - { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, - { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, - { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, - { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, - { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, - { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, - { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, - { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, - { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, - { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, - { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, - { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, - { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } - } - }, - { /* Block Type ( 2 ) */ - { /* Coeff Band ( 0 )*/ - { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, - { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, - { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, - { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, - { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, - { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, - { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, - { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, - { /* Block Type ( 3 ) */ - { /* Coeff Band ( 0 )*/ - { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, - { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, - { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, - { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, - { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, - { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, - { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, - { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, - { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, - { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, - { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, - { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, - { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, - { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, - { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - } - } -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_ diff --git a/thirdparty/libvpx/vp8/common/dequantize.c b/thirdparty/libvpx/vp8/common/dequantize.c deleted file mode 100644 index f8b04fa4ee..0000000000 --- a/thirdparty/libvpx/vp8/common/dequantize.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vp8/common/blockd.h" -#include "vpx_mem/vpx_mem.h" - -void vp8_dequantize_b_c(BLOCKD *d, short *DQC) -{ - int i; - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - - for (i = 0; i < 16; i++) - { - DQ[i] = Q[i] * DQC[i]; - } -} - -void vp8_dequant_idct_add_c(short *input, short *dq, - unsigned char *dest, int stride) -{ - int i; - - for (i = 0; i < 16; i++) - { - input[i] = dq[i] * input[i]; - } - - vp8_short_idct4x4llm_c(input, dest, stride, dest, stride); - - memset(input, 0, 32); - -} diff --git a/thirdparty/libvpx/vp8/common/entropy.c b/thirdparty/libvpx/vp8/common/entropy.c deleted file mode 100644 index c00e565f06..0000000000 --- a/thirdparty/libvpx/vp8/common/entropy.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "entropy.h" -#include "blockd.h" -#include "onyxc_int.h" -#include "vpx_mem/vpx_mem.h" - -#include "coefupdateprobs.h" - -DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = -{ - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) = -{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7}; - -DECLARE_ALIGNED(16, const unsigned char, - vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = -{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0}; - -DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = -{ - 0, 1, 4, 8, - 5, 2, 3, 6, - 9, 12, 13, 10, - 7, 11, 14, 15, -}; - -DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = -{ - 1, 2, 6, 7, - 3, 5, 8, 13, - 4, 9, 12, 14, - 10, 11, 15, 16 -}; - -/* vp8_default_zig_zag_mask generated with: - - void vp8_init_scan_order_mask() - { - int i; - - for (i = 0; i < 16; i++) - { - vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; - } - - } -*/ -DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) = -{ - 1, 2, 32, 64, - 4, 16, 128, 4096, - 8, 256, 2048, 8192, - 512, 1024, 16384, -32768 -}; - -const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; - -/* Array indices are identical to previously-existing CONTEXT_NODE indices */ - -const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ -{ - -DCT_EOB_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, 6, /* 2 = ONE */ - 8, 12, /* 3 = LOW_VAL */ - -TWO_TOKEN, 10, /* 4 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ - 14, 16, /* 6 = HIGH_LOW */ - -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ - 18, 20, /* 8 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ -}; - -/* vp8_coef_encodings generated with: - vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); -*/ -vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = -{ - {2, 2}, - {6, 3}, - {28, 5}, - {58, 6}, - {59, 6}, - {60, 6}, - {61, 6}, - {124, 7}, - {125, 7}, - {126, 7}, - {127, 7}, - {0, 1} -}; - -/* Trees for extra bits. Probabilities are constant and - do not depend on previously encoded bits */ - -static const vp8_prob Pcat1[] = { 159}; -static const vp8_prob Pcat2[] = { 165, 145}; -static const vp8_prob Pcat3[] = { 173, 148, 140}; -static const vp8_prob Pcat4[] = { 176, 155, 140, 135}; -static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130}; -static const vp8_prob Pcat6[] = -{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129}; - - -/* tree index tables generated with: - - void init_bit_tree(vp8_tree_index *p, int n) - { - int i = 0; - - while (++i < n) - { - p[0] = p[1] = i << 1; - p += 2; - } - - p[0] = p[1] = 0; - } - - void init_bit_trees() - { - init_bit_tree(cat1, 1); - init_bit_tree(cat2, 2); - init_bit_tree(cat3, 3); - init_bit_tree(cat4, 4); - init_bit_tree(cat5, 5); - init_bit_tree(cat6, 11); - } -*/ - -static const vp8_tree_index cat1[2] = { 0, 0 }; -static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 }; -static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 }; -static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 }; -static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 }; -static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, - 14, 14, 16, 16, 18, 18, 20, 20, 0, 0 }; - -const vp8_extra_bit_struct vp8_extra_bits[12] = -{ - { 0, 0, 0, 0}, - { 0, 0, 0, 1}, - { 0, 0, 0, 2}, - { 0, 0, 0, 3}, - { 0, 0, 0, 4}, - { cat1, Pcat1, 1, 5}, - { cat2, Pcat2, 2, 7}, - { cat3, Pcat3, 3, 11}, - { cat4, Pcat4, 4, 19}, - { cat5, Pcat5, 5, 35}, - { cat6, Pcat6, 11, 67}, - { 0, 0, 0, 0} -}; - -#include "default_coef_probs.h" - -void vp8_default_coef_probs(VP8_COMMON *pc) -{ - memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs)); -} - diff --git a/thirdparty/libvpx/vp8/common/entropy.h b/thirdparty/libvpx/vp8/common/entropy.h deleted file mode 100644 index a90bab4bac..0000000000 --- a/thirdparty/libvpx/vp8/common/entropy.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ENTROPY_H_ -#define VP8_COMMON_ENTROPY_H_ - -#include "treecoder.h" -#include "blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Coefficient token alphabet */ - -#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ -#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ -#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ -#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ -#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ -#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ -#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ -#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ -#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ -#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */ -#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ - -#define MAX_ENTROPY_TOKENS 12 -#define ENTROPY_NODES 11 - -extern const vp8_tree_index vp8_coef_tree[]; - -extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS]; - -typedef struct -{ - vp8_tree_p tree; - const vp8_prob *prob; - int Len; - int base_val; -} vp8_extra_bit_struct; - -extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ - -#define PROB_UPDATE_BASELINE_COST 7 - -#define MAX_PROB 255 -#define DCT_MAX_VALUE 2048 - - -/* Coefficients are predicted via a 3-dimensional probability table. */ - -/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ - -#define BLOCK_TYPES 4 - -/* Middle dimension is a coarsening of the coefficient's - position within the 4x4 DCT. */ - -#define COEF_BANDS 8 -extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); - -/* Inside dimension is 3-valued measure of nearby complexity, that is, - the extent to which nearby coefficients are nonzero. For the first - coefficient (DC, unless block type is 0), we look at the (already encoded) - blocks above and to the left of the current block. The context index is - then the number (0,1,or 2) of these blocks having nonzero coefficients. - After decoding a coefficient, the measure is roughly the size of the - most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1). - Note that the intuitive meaning of this measure changes as coefficients - are decoded, e.g., prior to the first token, a zero means that my neighbors - are empty while, after the first token, because of the use of end-of-block, - a zero means we just decoded a zero and hence guarantees that a non-zero - coefficient will appear later in this block. However, this shift - in meaning is perfectly OK because our context depends also on the - coefficient band (and since zigzag positions 0, 1, and 2 are in - distinct bands). */ - -/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ -# define PREV_COEF_CONTEXTS 3 - -extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); - -extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - - -struct VP8Common; -void vp8_default_coef_probs(struct VP8Common *); - -extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); -extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); -extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]); -extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; - -void vp8_coef_tree_initialize(void); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ENTROPY_H_ diff --git a/thirdparty/libvpx/vp8/common/entropymode.c b/thirdparty/libvpx/vp8/common/entropymode.c deleted file mode 100644 index 8981a8d3c2..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymode.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#define USE_PREBUILT_TABLES - -#include "entropymode.h" -#include "entropy.h" -#include "vpx_mem/vpx_mem.h" - -#include "vp8_entropymodedata.h" - -int vp8_mv_cont(const int_mv *l, const int_mv *a) -{ - int lez = (l->as_int == 0); - int aez = (a->as_int == 0); - int lea = (l->as_int == a->as_int); - - if (lea && lez) - return SUBMVREF_LEFT_ABOVE_ZED; - - if (lea) - return SUBMVREF_LEFT_ABOVE_SAME; - - if (aez) - return SUBMVREF_ABOVE_ZED; - - if (lez) - return SUBMVREF_LEFT_ZED; - - return SUBMVREF_NORMAL; -} - -static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25}; - -const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] = -{ - { 147, 136, 18 }, - { 106, 145, 1 }, - { 179, 121, 1 }, - { 223, 1 , 34 }, - { 208, 1 , 1 } -}; - - - -const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] = -{ - { - 0, 0, 0, 0, - 0, 0, 0, 0, - 1, 1, 1, 1, - 1, 1, 1, 1, - }, - { - 0, 0, 1, 1, - 0, 0, 1, 1, - 0, 0, 1, 1, - 0, 0, 1, 1, - }, - { - 0, 0, 1, 1, - 0, 0, 1, 1, - 2, 2, 3, 3, - 2, 2, 3, 3, - }, - { - 0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15, - } -}; - -const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16}; - -const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150}; - - -/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ - -const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */ -{ - -B_DC_PRED, 2, /* 0 = DC_NODE */ - -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ - 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ -}; - -/* Again, these trees use the same probability indices as their - explicitly-programmed predecessors. */ - -const vp8_tree_index vp8_ymode_tree[8] = -{ - -DC_PRED, 2, - 4, 6, - -V_PRED, -H_PRED, - -TM_PRED, -B_PRED -}; - -const vp8_tree_index vp8_kf_ymode_tree[8] = -{ - -B_PRED, 2, - 4, 6, - -DC_PRED, -V_PRED, - -H_PRED, -TM_PRED -}; - -const vp8_tree_index vp8_uv_mode_tree[6] = -{ - -DC_PRED, 2, - -V_PRED, 4, - -H_PRED, -TM_PRED -}; - -const vp8_tree_index vp8_mbsplit_tree[6] = -{ - -3, 2, - -2, 4, - -0, -1 -}; - -const vp8_tree_index vp8_mv_ref_tree[8] = -{ - -ZEROMV, 2, - -NEARESTMV, 4, - -NEARMV, 6, - -NEWMV, -SPLITMV -}; - -const vp8_tree_index vp8_sub_mv_ref_tree[6] = -{ - -LEFT4X4, 2, - -ABOVE4X4, 4, - -ZERO4X4, -NEW4X4 -}; - -const vp8_tree_index vp8_small_mvtree [14] = -{ - 2, 8, - 4, 6, - -0, -1, - -2, -3, - 10, 12, - -4, -5, - -6, -7 -}; - -void vp8_init_mbmode_probs(VP8_COMMON *x) -{ - memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); - memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); - memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); -} - -void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1]) -{ - memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); -} - diff --git a/thirdparty/libvpx/vp8/common/entropymode.h b/thirdparty/libvpx/vp8/common/entropymode.h deleted file mode 100644 index 81bdfc4b8b..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymode.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ENTROPYMODE_H_ -#define VP8_COMMON_ENTROPYMODE_H_ - -#include "onyxc_int.h" -#include "treecoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum -{ - SUBMVREF_NORMAL, - SUBMVREF_LEFT_ZED, - SUBMVREF_ABOVE_ZED, - SUBMVREF_LEFT_ABOVE_SAME, - SUBMVREF_LEFT_ABOVE_ZED -} sumvfref_t; - -typedef int vp8_mbsplit[16]; - -#define VP8_NUMMBSPLITS 4 - -extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; - -extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */ - -extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1]; - -extern int vp8_mv_cont(const int_mv *l, const int_mv *a); -#define SUBMVREF_COUNT 5 -extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1]; - - -extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES]; - - -extern const vp8_tree_index vp8_bmode_tree[]; - -extern const vp8_tree_index vp8_ymode_tree[]; -extern const vp8_tree_index vp8_kf_ymode_tree[]; -extern const vp8_tree_index vp8_uv_mode_tree[]; - -extern const vp8_tree_index vp8_mbsplit_tree[]; -extern const vp8_tree_index vp8_mv_ref_tree[]; -extern const vp8_tree_index vp8_sub_mv_ref_tree[]; - -extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES]; -extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES]; -extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES]; -extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES]; -extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS]; - -/* Inter mode values do not start at zero */ - -extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS]; -extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS]; - -extern const vp8_tree_index vp8_small_mvtree[]; - -extern const struct vp8_token_struct vp8_small_mvencodings[8]; - -/* Key frame default mode probs */ -extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES] -[VP8_BINTRAMODES-1]; -extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1]; -extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1]; - -void vp8_init_mbmode_probs(VP8_COMMON *x); -void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); -void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ENTROPYMODE_H_ diff --git a/thirdparty/libvpx/vp8/common/entropymv.c b/thirdparty/libvpx/vp8/common/entropymv.c deleted file mode 100644 index e5df1f0955..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymv.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "entropymv.h" - -const MV_CONTEXT vp8_mv_update_probs[2] = -{ - {{ - 237, - 246, - 253, 253, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 - }}, - {{ - 231, - 243, - 245, 253, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 - }} -}; -const MV_CONTEXT vp8_default_mv_context[2] = -{ - {{ - /* row */ - 162, /* is short */ - 128, /* sign */ - 225, 146, 172, 147, 214, 39, 156, /* short tree */ - 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ - }}, - - - - {{ - /* same for column */ - 164, /* is short */ - 128, - 204, 170, 119, 235, 140, 230, 228, - 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */ - - }} -}; diff --git a/thirdparty/libvpx/vp8/common/entropymv.h b/thirdparty/libvpx/vp8/common/entropymv.h deleted file mode 100644 index 42840d58ad..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymv.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ENTROPYMV_H_ -#define VP8_COMMON_ENTROPYMV_H_ - -#include "treecoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum -{ - mv_max = 1023, /* max absolute value of a MV component */ - MVvals = (2 * mv_max) + 1, /* # possible values "" */ - mvfp_max = 255, /* max absolute value of a full pixel MV component */ - MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */ - - mvlong_width = 10, /* Large MVs have 9 bit magnitudes */ - mvnum_short = 8, /* magnitudes 0 through 7 */ - - /* probability offsets for coding each MV component */ - - mvpis_short = 0, /* short (<= 7) vs long (>= 8) */ - MVPsign, /* sign for non-zero */ - MVPshort, /* 8 short values = 7-position tree */ - - MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */ - MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */ -}; - -typedef struct mv_context -{ - vp8_prob prob[MVPcount]; /* often come in row, col pairs */ -} MV_CONTEXT; - -extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ENTROPYMV_H_ diff --git a/thirdparty/libvpx/vp8/common/extend.c b/thirdparty/libvpx/vp8/common/extend.c deleted file mode 100644 index 2d938ad782..0000000000 --- a/thirdparty/libvpx/vp8/common/extend.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "extend.h" -#include "vpx_mem/vpx_mem.h" - - -static void copy_and_extend_plane -( - unsigned char *s, /* source */ - int sp, /* source pitch */ - unsigned char *d, /* destination */ - int dp, /* destination pitch */ - int h, /* height */ - int w, /* width */ - int et, /* extend top border */ - int el, /* extend left border */ - int eb, /* extend bottom border */ - int er /* extend right border */ -) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - int linesize; - - /* copy the left and right most columns out */ - src_ptr1 = s; - src_ptr2 = s + w - 1; - dest_ptr1 = d - el; - dest_ptr2 = d + w; - - for (i = 0; i < h; i++) - { - memset(dest_ptr1, src_ptr1[0], el); - memcpy(dest_ptr1 + el, src_ptr1, w); - memset(dest_ptr2, src_ptr2[0], er); - src_ptr1 += sp; - src_ptr2 += sp; - dest_ptr1 += dp; - dest_ptr2 += dp; - } - - /* Now copy the top and bottom lines into each line of the respective - * borders - */ - src_ptr1 = d - el; - src_ptr2 = d + dp * (h - 1) - el; - dest_ptr1 = d + dp * (-et) - el; - dest_ptr2 = d + dp * (h) - el; - linesize = el + er + w; - - for (i = 0; i < et; i++) - { - memcpy(dest_ptr1, src_ptr1, linesize); - dest_ptr1 += dp; - } - - for (i = 0; i < eb; i++) - { - memcpy(dest_ptr2, src_ptr2, linesize); - dest_ptr2 += dp; - } -} - - -void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) -{ - int et = dst->border; - int el = dst->border; - int eb = dst->border + dst->y_height - src->y_height; - int er = dst->border + dst->y_width - src->y_width; - - copy_and_extend_plane(src->y_buffer, src->y_stride, - dst->y_buffer, dst->y_stride, - src->y_height, src->y_width, - et, el, eb, er); - - et = dst->border >> 1; - el = dst->border >> 1; - eb = (dst->border >> 1) + dst->uv_height - src->uv_height; - er = (dst->border >> 1) + dst->uv_width - src->uv_width; - - copy_and_extend_plane(src->u_buffer, src->uv_stride, - dst->u_buffer, dst->uv_stride, - src->uv_height, src->uv_width, - et, el, eb, er); - - copy_and_extend_plane(src->v_buffer, src->uv_stride, - dst->v_buffer, dst->uv_stride, - src->uv_height, src->uv_width, - et, el, eb, er); -} - - -void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int srcy, int srcx, - int srch, int srcw) -{ - int et = dst->border; - int el = dst->border; - int eb = dst->border + dst->y_height - src->y_height; - int er = dst->border + dst->y_width - src->y_width; - int src_y_offset = srcy * src->y_stride + srcx; - int dst_y_offset = srcy * dst->y_stride + srcx; - int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); - int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - - /* If the side is not touching the bounder then don't extend. */ - if (srcy) - et = 0; - if (srcx) - el = 0; - if (srcy + srch != src->y_height) - eb = 0; - if (srcx + srcw != src->y_width) - er = 0; - - copy_and_extend_plane(src->y_buffer + src_y_offset, - src->y_stride, - dst->y_buffer + dst_y_offset, - dst->y_stride, - srch, srcw, - et, el, eb, er); - - et = (et + 1) >> 1; - el = (el + 1) >> 1; - eb = (eb + 1) >> 1; - er = (er + 1) >> 1; - srch = (srch + 1) >> 1; - srcw = (srcw + 1) >> 1; - - copy_and_extend_plane(src->u_buffer + src_uv_offset, - src->uv_stride, - dst->u_buffer + dst_uv_offset, - dst->uv_stride, - srch, srcw, - et, el, eb, er); - - copy_and_extend_plane(src->v_buffer + src_uv_offset, - src->uv_stride, - dst->v_buffer + dst_uv_offset, - dst->uv_stride, - srch, srcw, - et, el, eb, er); -} - - -/* note the extension is only for the last row, for intra prediction purpose */ -void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, - unsigned char *YPtr, - unsigned char *UPtr, - unsigned char *VPtr) -{ - int i; - - YPtr += ybf->y_stride * 14; - UPtr += ybf->uv_stride * 6; - VPtr += ybf->uv_stride * 6; - - for (i = 0; i < 4; i++) - { - YPtr[i] = YPtr[-1]; - UPtr[i] = UPtr[-1]; - VPtr[i] = VPtr[-1]; - } - - YPtr += ybf->y_stride; - UPtr += ybf->uv_stride; - VPtr += ybf->uv_stride; - - for (i = 0; i < 4; i++) - { - YPtr[i] = YPtr[-1]; - UPtr[i] = UPtr[-1]; - VPtr[i] = VPtr[-1]; - } -} diff --git a/thirdparty/libvpx/vp8/common/extend.h b/thirdparty/libvpx/vp8/common/extend.h deleted file mode 100644 index 068f4ac523..0000000000 --- a/thirdparty/libvpx/vp8/common/extend.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_EXTEND_H_ -#define VP8_COMMON_EXTEND_H_ - -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr); -void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst); -void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int srcy, int srcx, - int srch, int srcw); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_EXTEND_H_ diff --git a/thirdparty/libvpx/vp8/common/filter.c b/thirdparty/libvpx/vp8/common/filter.c deleted file mode 100644 index 84c608effa..0000000000 --- a/thirdparty/libvpx/vp8/common/filter.c +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "filter.h" -#include "./vp8_rtcd.h" - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = -{ - { 128, 0 }, - { 112, 16 }, - { 96, 32 }, - { 80, 48 }, - { 64, 64 }, - { 48, 80 }, - { 32, 96 }, - { 16, 112 } -}; - -DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = -{ - - { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ - { 0, -6, 123, 12, -1, 0 }, - { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ - { 0, -9, 93, 50, -6, 0 }, - { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ - { 0, -6, 50, 93, -9, 0 }, - { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ - { 0, -1, 12, 123, -6, 0 }, -}; - -static void filter_block2d_first_pass -( - unsigned char *src_ptr, - int *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + - ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - - /* Normalize back to 0-255 */ - Temp = Temp >> VP8_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = Temp; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -static void filter_block2d_second_pass -( - int *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - /* Apply filter */ - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + - ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - - /* Normalize back to 0-255 */ - Temp = Temp >> VP8_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = (unsigned char)Temp; - src_ptr++; - } - - /* Start next row */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_pitch; - } -} - - -static void filter_block2d -( - unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - int output_pitch, - const short *HFilter, - const short *VFilter -) -{ - int FData[9*4]; /* Temp data buffer used in filtering */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); -} - - -void vp8_sixtap_predict4x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); -} -void vp8_sixtap_predict8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[13*16]; /* Temp data buffer used in filtering */ - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); - - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); - -} - -void vp8_sixtap_predict8x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[13*16]; /* Temp data buffer used in filtering */ - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); - - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); - -} - -void vp8_sixtap_predict16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[21*24]; /* Temp data buffer used in filtering */ - - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); - -} - - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_stride : Stride of source block. - * UINT32 height : Block height. - * UINT32 width : Block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block - * in the horizontal direction to produce the filtered output - * block. Used to implement first-pass of 2-D separable filter. - * - * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. - * Two filter taps should sum to VP8_FILTER_WEIGHT. - * - ****************************************************************************/ -static void filter_block2d_bil_first_pass -( - unsigned char *src_ptr, - unsigned short *dst_ptr, - unsigned int src_stride, - unsigned int height, - unsigned int width, - const short *vp8_filter -) -{ - unsigned int i, j; - - for (i = 0; i < height; i++) - { - for (j = 0; j < width; j++) - { - /* Apply bilinear filter */ - dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[1] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_stride - width; - dst_ptr += width; - } -} - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 dst_pitch : Destination block pitch. - * UINT32 height : Block height. - * UINT32 width : Block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block - * in the vertical direction to produce the filtered output - * block. Used to implement second-pass of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. - * Two filter taps should sum to VP8_FILTER_WEIGHT. - * - ****************************************************************************/ -static void filter_block2d_bil_second_pass -( - unsigned short *src_ptr, - unsigned char *dst_ptr, - int dst_pitch, - unsigned int height, - unsigned int width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < height; i++) - { - for (j = 0; j < width; j++) - { - /* Apply filter */ - Temp = ((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[width] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2); - dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); - src_ptr++; - } - - /* Next row... */ - dst_ptr += dst_pitch; - } -} - - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pitch : Stride of source block. - * UINT32 dst_pitch : Stride of destination block. - * INT32 *HFilter : Array of 2 horizontal filter taps. - * INT32 *VFilter : Array of 2 vertical filter taps. - * INT32 Width : Block width - * INT32 Height : Block height - * - * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : 2-D filters an input block by applying a 2-tap - * bi-linear filter horizontally followed by a 2-tap - * bi-linear filter vertically on the result. - * - * SPECIAL NOTES : The largest block size can be handled here is 16x16 - * - ****************************************************************************/ -static void filter_block2d_bil -( - unsigned char *src_ptr, - unsigned char *dst_ptr, - unsigned int src_pitch, - unsigned int dst_pitch, - const short *HFilter, - const short *VFilter, - int Width, - int Height -) -{ - - unsigned short FData[17*16]; /* Temp data buffer used in filtering */ - - /* First filter 1-D horizontally... */ - filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); - - /* then 1-D vertically... */ - filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); -} - - -void vp8_bilinear_predict4x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; -#if 0 - { - int i; - unsigned char temp1[16]; - unsigned char temp2[16]; - - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - - for (i = 0; i < 16; i++) - { - if (temp1[i] != temp2[i]) - { - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - } - } - } -#endif - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); - -} - -void vp8_bilinear_predict8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); - -} - -void vp8_bilinear_predict8x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); - -} - -void vp8_bilinear_predict16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); -} diff --git a/thirdparty/libvpx/vp8/common/filter.h b/thirdparty/libvpx/vp8/common/filter.h deleted file mode 100644 index cfba775fce..0000000000 --- a/thirdparty/libvpx/vp8/common/filter.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_FILTER_H_ -#define VP8_COMMON_FILTER_H_ - -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]); -extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_FILTER_H_ diff --git a/thirdparty/libvpx/vp8/common/findnearmv.c b/thirdparty/libvpx/vp8/common/findnearmv.c deleted file mode 100644 index e8ee40f56c..0000000000 --- a/thirdparty/libvpx/vp8/common/findnearmv.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "findnearmv.h" - -const unsigned char vp8_mbsplit_offset[4][16] = { - { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} -}; - -/* Predict motion vectors using those from already-decoded nearby blocks. - Note that we only consider one 4x4 subblock from each candidate 16x16 - macroblock. */ -void vp8_find_near_mvs -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv *nearest, - int_mv *nearby, - int_mv *best_mv, - int cnt[4], - int refframe, - int *ref_frame_sign_bias -) -{ - const MODE_INFO *above = here - xd->mode_info_stride; - const MODE_INFO *left = here - 1; - const MODE_INFO *aboveleft = above - 1; - int_mv near_mvs[4]; - int_mv *mv = near_mvs; - int *cntx = cnt; - enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV}; - - /* Zero accumulators */ - mv[0].as_int = mv[1].as_int = mv[2].as_int = 0; - cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; - - /* Process above */ - if (above->mbmi.ref_frame != INTRA_FRAME) - { - if (above->mbmi.mv.as_int) - { - (++mv)->as_int = above->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias); - ++cntx; - } - - *cntx += 2; - } - - /* Process left */ - if (left->mbmi.ref_frame != INTRA_FRAME) - { - if (left->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = left->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != mv->as_int) - { - (++mv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 2; - } - else - cnt[CNT_INTRA] += 2; - } - - /* Process above left */ - if (aboveleft->mbmi.ref_frame != INTRA_FRAME) - { - if (aboveleft->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = aboveleft->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != mv->as_int) - { - (++mv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 1; - } - else - cnt[CNT_INTRA] += 1; - } - - /* If we have three distinct MV's ... */ - if (cnt[CNT_SPLITMV]) - { - /* See if above-left MV can be merged with NEAREST */ - if (mv->as_int == near_mvs[CNT_NEAREST].as_int) - cnt[CNT_NEAREST] += 1; - } - - cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) - + (left->mbmi.mode == SPLITMV)) * 2 - + (aboveleft->mbmi.mode == SPLITMV); - - /* Swap near and nearest if necessary */ - if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) - { - int tmp; - tmp = cnt[CNT_NEAREST]; - cnt[CNT_NEAREST] = cnt[CNT_NEAR]; - cnt[CNT_NEAR] = tmp; - tmp = near_mvs[CNT_NEAREST].as_int; - near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; - near_mvs[CNT_NEAR].as_int = tmp; - } - - /* Use near_mvs[0] to store the "best" MV */ - if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]) - near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST]; - - /* Set up return values */ - best_mv->as_int = near_mvs[0].as_int; - nearest->as_int = near_mvs[CNT_NEAREST].as_int; - nearby->as_int = near_mvs[CNT_NEAR].as_int; -} - - -static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd) -{ - inv->as_mv.row = src->as_mv.row * -1; - inv->as_mv.col = src->as_mv.col * -1; - vp8_clamp_mv2(inv, xd); - vp8_clamp_mv2(src, xd); -} - - -int vp8_find_near_mvs_bias -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv mode_mv_sb[2][MB_MODE_COUNT], - int_mv best_mv_sb[2], - int cnt[4], - int refframe, - int *ref_frame_sign_bias -) -{ - int sign_bias = ref_frame_sign_bias[refframe]; - - vp8_find_near_mvs(xd, - here, - &mode_mv_sb[sign_bias][NEARESTMV], - &mode_mv_sb[sign_bias][NEARMV], - &best_mv_sb[sign_bias], - cnt, - refframe, - ref_frame_sign_bias); - - invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV], - &mode_mv_sb[sign_bias][NEARESTMV], xd); - invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV], - &mode_mv_sb[sign_bias][NEARMV], xd); - invert_and_clamp_mvs(&best_mv_sb[!sign_bias], - &best_mv_sb[sign_bias], xd); - - return sign_bias; -} - - -vp8_prob *vp8_mv_ref_probs( - vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4] -) -{ - p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0]; - p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1]; - p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2]; - p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3]; - /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/ - return p; -} - diff --git a/thirdparty/libvpx/vp8/common/findnearmv.h b/thirdparty/libvpx/vp8/common/findnearmv.h deleted file mode 100644 index 472a7b5d8d..0000000000 --- a/thirdparty/libvpx/vp8/common/findnearmv.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_FINDNEARMV_H_ -#define VP8_COMMON_FINDNEARMV_H_ - -#include "./vpx_config.h" -#include "mv.h" -#include "blockd.h" -#include "modecont.h" -#include "treecoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe, - int_mv *mvp, const int *ref_frame_sign_bias) -{ - if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) - { - mvp->as_mv.row *= -1; - mvp->as_mv.col *= -1; - } -} - -#define LEFT_TOP_MARGIN (16 << 3) -#define RIGHT_BOTTOM_MARGIN (16 << 3) -static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) -{ - if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) - mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) - mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) - mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) - mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; -} - -static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, - int mb_to_right_edge, int mb_to_top_edge, - int mb_to_bottom_edge) -{ - mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ? - mb_to_left_edge : mv->as_mv.col; - mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ? - mb_to_right_edge : mv->as_mv.col; - mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ? - mb_to_top_edge : mv->as_mv.row; - mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ? - mb_to_bottom_edge : mv->as_mv.row; -} -static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) -{ - unsigned int need_to_clamp; - need_to_clamp = (mv->as_mv.col < mb_to_left_edge); - need_to_clamp |= (mv->as_mv.col > mb_to_right_edge); - need_to_clamp |= (mv->as_mv.row < mb_to_top_edge); - need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge); - return need_to_clamp; -} - -void vp8_find_near_mvs -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv *nearest, int_mv *nearby, int_mv *best, - int near_mv_ref_cts[4], - int refframe, - int *ref_frame_sign_bias -); - - -int vp8_find_near_mvs_bias -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv mode_mv_sb[2][MB_MODE_COUNT], - int_mv best_mv_sb[2], - int cnt[4], - int refframe, - int *ref_frame_sign_bias -); - - -vp8_prob *vp8_mv_ref_probs( - vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4] -); - -extern const unsigned char vp8_mbsplit_offset[4][16]; - - -static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b) -{ - if (!(b & 3)) - { - /* On L edge, get from MB to left of us */ - --cur_mb; - - if(cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.mv.as_int; - b += 4; - } - - return (cur_mb->bmi + b - 1)->mv.as_int; -} - -static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b, - int mi_stride) -{ - if (!(b >> 2)) - { - /* On top edge, get from MB above us */ - cur_mb -= mi_stride; - - if(cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.mv.as_int; - b += 16; - } - - return (cur_mb->bmi + (b - 4))->mv.as_int; -} -static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) -{ - if (!(b & 3)) - { - /* On L edge, get from MB to left of us */ - --cur_mb; - switch (cur_mb->mbmi.mode) - { - case B_PRED: - return (cur_mb->bmi + b + 3)->as_mode; - case DC_PRED: - return B_DC_PRED; - case V_PRED: - return B_VE_PRED; - case H_PRED: - return B_HE_PRED; - case TM_PRED: - return B_TM_PRED; - default: - return B_DC_PRED; - } - } - - return (cur_mb->bmi + b - 1)->as_mode; -} - -static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, - int mi_stride) -{ - if (!(b >> 2)) - { - /* On top edge, get from MB above us */ - cur_mb -= mi_stride; - - switch (cur_mb->mbmi.mode) - { - case B_PRED: - return (cur_mb->bmi + b + 12)->as_mode; - case DC_PRED: - return B_DC_PRED; - case V_PRED: - return B_VE_PRED; - case H_PRED: - return B_HE_PRED; - case TM_PRED: - return B_TM_PRED; - default: - return B_DC_PRED; - } - } - - return (cur_mb->bmi + b - 4)->as_mode; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_FINDNEARMV_H_ diff --git a/thirdparty/libvpx/vp8/common/generic/systemdependent.c b/thirdparty/libvpx/vp8/common/generic/systemdependent.c deleted file mode 100644 index 6d5f302d7a..0000000000 --- a/thirdparty/libvpx/vp8/common/generic/systemdependent.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#if ARCH_ARM -#include "vpx_ports/arm.h" -#elif ARCH_X86 || ARCH_X86_64 -#include "vpx_ports/x86.h" -#endif -#include "vp8/common/onyxc_int.h" -#include "vp8/common/systemdependent.h" - -#if CONFIG_MULTITHREAD -#if HAVE_UNISTD_H && !defined(__OS2__) -#include -#elif defined(_WIN32) -#include -typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO); -#elif defined(__OS2__) -#define INCL_DOS -#define INCL_DOSSPINLOCK -#include -#endif -#endif - -#if CONFIG_MULTITHREAD -static int get_cpu_count() -{ - int core_count = 16; - -#if HAVE_UNISTD_H && !defined(__OS2__) -#if defined(_SC_NPROCESSORS_ONLN) - core_count = sysconf(_SC_NPROCESSORS_ONLN); -#elif defined(_SC_NPROC_ONLN) - core_count = sysconf(_SC_NPROC_ONLN); -#endif -#elif defined(_WIN32) - { -#if _WIN32_WINNT >= 0x0501 - SYSTEM_INFO sysinfo; - GetNativeSystemInfo(&sysinfo); -#else - PGNSI pGNSI; - SYSTEM_INFO sysinfo; - - /* Call GetNativeSystemInfo if supported or - * GetSystemInfo otherwise. */ - - pGNSI = (PGNSI) GetProcAddress( - GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); - if (pGNSI != NULL) - pGNSI(&sysinfo); - else - GetSystemInfo(&sysinfo); -#endif - - core_count = sysinfo.dwNumberOfProcessors; - } -#elif defined(__OS2__) - { - ULONG proc_id; - ULONG status; - - core_count = 0; - for (proc_id = 1; ; proc_id++) - { - if (DosGetProcessorStatus(proc_id, &status)) - break; - - if (status == PROC_ONLINE) - core_count++; - } - } -#else - /* other platforms */ -#endif - - return core_count > 0 ? core_count : 1; -} -#endif - -void vp8_clear_system_state_c() {}; - -void vp8_machine_specific_config(VP8_COMMON *ctx) -{ -#if CONFIG_MULTITHREAD - ctx->processor_core_count = get_cpu_count(); -#else - (void)ctx; -#endif /* CONFIG_MULTITHREAD */ - -#if ARCH_ARM - ctx->cpu_caps = arm_cpu_caps(); -#elif ARCH_X86 || ARCH_X86_64 - ctx->cpu_caps = x86_simd_caps(); -#endif -} diff --git a/thirdparty/libvpx/vp8/common/header.h b/thirdparty/libvpx/vp8/common/header.h deleted file mode 100644 index e27bca16bd..0000000000 --- a/thirdparty/libvpx/vp8/common/header.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_HEADER_H_ -#define VP8_COMMON_HEADER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* 24 bits total */ -typedef struct -{ - unsigned int type: 1; - unsigned int version: 3; - unsigned int show_frame: 1; - - /* Allow 2^20 bytes = 8 megabits for first partition */ - - unsigned int first_partition_length_in_bytes: 19; - -#ifdef PACKET_TESTING - unsigned int frame_number; - unsigned int update_gold: 1; - unsigned int uses_gold: 1; - unsigned int update_last: 1; - unsigned int uses_last: 1; -#endif - -} VP8_HEADER; - -#ifdef PACKET_TESTING -#define VP8_HEADER_SIZE 8 -#else -#define VP8_HEADER_SIZE 3 -#endif - - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_HEADER_H_ diff --git a/thirdparty/libvpx/vp8/common/idct_blk.c b/thirdparty/libvpx/vp8/common/idct_blk.c deleted file mode 100644 index 8aa7d9bf0f..0000000000 --- a/thirdparty/libvpx/vp8/common/idct_blk.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx_mem/vpx_mem.h" - -void vp8_dequant_idct_add_c(short *input, short *dq, - unsigned char *dest, int stride); -void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred, - int pred_stride, unsigned char *dst_ptr, - int dst_stride); - -void vp8_dequant_idct_add_y_block_c - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i, j; - - for (i = 0; i < 4; i++) - { - for (j = 0; j < 4; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dst, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - q += 16; - dst += 4; - } - - dst += 4*stride - 16; - } -} - -void vp8_dequant_idct_add_uv_block_c - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - int i, j; - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dstu, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - q += 16; - dstu += 4; - } - - dstu += 4*stride - 8; - } - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dstv, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - q += 16; - dstv += 4; - } - - dstv += 4*stride - 8; - } -} diff --git a/thirdparty/libvpx/vp8/common/idctllm.c b/thirdparty/libvpx/vp8/common/idctllm.c deleted file mode 100644 index f5403c5aaf..0000000000 --- a/thirdparty/libvpx/vp8/common/idctllm.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp8_rtcd.h" - -/**************************************************************************** - * Notes: - * - * This implementation makes use of 16 bit fixed point verio of two multiply - * constants: - * 1. sqrt(2) * cos (pi/8) - * 2. sqrt(2) * sin (pi/8) - * Becuase the first constant is bigger than 1, to maintain the same 16 bit - * fixed point precision as the second one, we use a trick of - * x * a = x + x*(a-1) - * so - * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). - **************************************************************************/ -static const int cospi8sqrt2minus1 = 20091; -static const int sinpi8sqrt2 = 35468; - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, - int pred_stride, unsigned char *dst_ptr, - int dst_stride) -{ - int i; - int r, c; - int a1, b1, c1, d1; - short output[16]; - short *ip = input; - short *op = output; - int temp1, temp2; - int shortpitch = 4; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[8]; - b1 = ip[0] - ip[8]; - - temp1 = (ip[4] * sinpi8sqrt2) >> 16; - temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); - c1 = temp1 - temp2; - - temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); - temp2 = (ip[12] * sinpi8sqrt2) >> 16; - d1 = temp1 + temp2; - - op[shortpitch*0] = a1 + d1; - op[shortpitch*3] = a1 - d1; - - op[shortpitch*1] = b1 + c1; - op[shortpitch*2] = b1 - c1; - - ip++; - op++; - } - - ip = output; - op = output; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[2]; - b1 = ip[0] - ip[2]; - - temp1 = (ip[1] * sinpi8sqrt2) >> 16; - temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); - c1 = temp1 - temp2; - - temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); - temp2 = (ip[3] * sinpi8sqrt2) >> 16; - d1 = temp1 + temp2; - - - op[0] = (a1 + d1 + 4) >> 3; - op[3] = (a1 - d1 + 4) >> 3; - - op[1] = (b1 + c1 + 4) >> 3; - op[2] = (b1 - c1 + 4) >> 3; - - ip += shortpitch; - op += shortpitch; - } - - ip = output; - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int a = ip[c] + pred_ptr[c] ; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a ; - } - ip += 4; - dst_ptr += dst_stride; - pred_ptr += pred_stride; - } -} - -void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, - int pred_stride, unsigned char *dst_ptr, - int dst_stride) -{ - int a1 = ((input_dc + 4) >> 3); - int r, c; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int a = a1 + pred_ptr[c] ; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a ; - } - - dst_ptr += dst_stride; - pred_ptr += pred_stride; - } - -} - -void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) -{ - short output[16]; - int i; - int a1, b1, c1, d1; - int a2, b2, c2, d2; - short *ip = input; - short *op = output; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[12]; - b1 = ip[4] + ip[8]; - c1 = ip[4] - ip[8]; - d1 = ip[0] - ip[12]; - - op[0] = a1 + b1; - op[4] = c1 + d1; - op[8] = a1 - b1; - op[12] = d1 - c1; - ip++; - op++; - } - - ip = output; - op = output; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - - a2 = a1 + b1; - b2 = c1 + d1; - c2 = a1 - b1; - d2 = d1 - c1; - - op[0] = (a2 + 3) >> 3; - op[1] = (b2 + 3) >> 3; - op[2] = (c2 + 3) >> 3; - op[3] = (d2 + 3) >> 3; - - ip += 4; - op += 4; - } - - for(i = 0; i < 16; i++) - { - mb_dqcoeff[i * 16] = output[i]; - } -} - -void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) -{ - int i; - int a1; - - a1 = ((input[0] + 3) >> 3); - for(i = 0; i < 16; i++) - { - mb_dqcoeff[i * 16] = a1; - } -} diff --git a/thirdparty/libvpx/vp8/common/invtrans.h b/thirdparty/libvpx/vp8/common/invtrans.h deleted file mode 100644 index 9cfea8d513..0000000000 --- a/thirdparty/libvpx/vp8/common/invtrans.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_INVTRANS_H_ -#define VP8_COMMON_INVTRANS_H_ - -#include "./vpx_config.h" -#include "vp8_rtcd.h" -#include "blockd.h" -#include "onyxc_int.h" - -#if CONFIG_MULTITHREAD -#include "vpx_mem/vpx_mem.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -static void eob_adjust(char *eobs, short *diff) -{ - /* eob adjust.... the idct can only skip if both the dc and eob are zero */ - int js; - for(js = 0; js < 16; js++) - { - if((eobs[js] == 0) && (diff[0] != 0)) - eobs[js]++; - diff+=16; - } -} - -static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) -{ - short *DQC = xd->dequant_y1; - - if (xd->mode_info_context->mbmi.mode != SPLITMV) - { - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - vp8_short_inv_walsh4x4 - (&xd->block[24].dqcoeff[0], xd->qcoeff); - } - else - { - vp8_short_inv_walsh4x4_1 - (&xd->block[24].dqcoeff[0], xd->qcoeff); - } - eob_adjust(xd->eobs, xd->qcoeff); - - DQC = xd->dequant_y1_dc; - } - vp8_dequant_idct_add_y_block - (xd->qcoeff, DQC, - xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); -} -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_INVTRANS_H_ diff --git a/thirdparty/libvpx/vp8/common/loopfilter.h b/thirdparty/libvpx/vp8/common/loopfilter.h deleted file mode 100644 index 20a6bd375b..0000000000 --- a/thirdparty/libvpx/vp8/common/loopfilter.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_LOOPFILTER_H_ -#define VP8_COMMON_LOOPFILTER_H_ - -#include "vpx_ports/mem.h" -#include "vpx_config.h" -#include "vp8_rtcd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_LOOP_FILTER 63 -/* fraction of total macroblock rows to be used in fast filter level picking */ -/* has to be > 2 */ -#define PARTIAL_FRAME_FRACTION 8 - -typedef enum -{ - NORMAL_LOOPFILTER = 0, - SIMPLE_LOOPFILTER = 1 -} LOOPFILTERTYPE; - -#if ARCH_ARM -#define SIMD_WIDTH 1 -#else -#define SIMD_WIDTH 16 -#endif - -/* Need to align this structure so when it is declared and - * passed it can be loaded into vector registers. - */ -typedef struct -{ - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]); - unsigned char lvl[4][4][4]; - unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1]; - unsigned char mode_lf_lut[10]; -} loop_filter_info_n; - -typedef struct loop_filter_info -{ - const unsigned char * mblim; - const unsigned char * blim; - const unsigned char * lim; - const unsigned char * hev_thr; -} loop_filter_info; - - -typedef void loop_filter_uvfunction -( - unsigned char *u, /* source pointer */ - int p, /* pitch */ - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - unsigned char *v -); - -/* assorted loopfilter functions which get used elsewhere */ -struct VP8Common; -struct macroblockd; -struct modeinfo; - -void vp8_loop_filter_init(struct VP8Common *cm); - -void vp8_loop_filter_frame_init(struct VP8Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - -void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd, - int frame_type); - -void vp8_loop_filter_partial_frame(struct VP8Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - -void vp8_loop_filter_frame_yonly(struct VP8Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - -void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, - int sharpness_lvl); - -void vp8_loop_filter_row_normal(struct VP8Common *cm, - struct modeinfo *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr); - -void vp8_loop_filter_row_simple(struct VP8Common *cm, - struct modeinfo *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_LOOPFILTER_H_ diff --git a/thirdparty/libvpx/vp8/common/loopfilter_filters.c b/thirdparty/libvpx/vp8/common/loopfilter_filters.c deleted file mode 100644 index 1d51696ff7..0000000000 --- a/thirdparty/libvpx/vp8/common/loopfilter_filters.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include "loopfilter.h" -#include "onyxc_int.h" - -typedef unsigned char uc; - -static signed char vp8_signed_char_clamp(int t) -{ - t = (t < -128 ? -128 : t); - t = (t > 127 ? 127 : t); - return (signed char) t; -} - - -/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static signed char vp8_filter_mask(uc limit, uc blimit, - uc p3, uc p2, uc p1, uc p0, - uc q0, uc q1, uc q2, uc q3) -{ - signed char mask = 0; - mask |= (abs(p3 - p2) > limit); - mask |= (abs(p2 - p1) > limit); - mask |= (abs(p1 - p0) > limit); - mask |= (abs(q1 - q0) > limit); - mask |= (abs(q2 - q1) > limit); - mask |= (abs(q3 - q2) > limit); - mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit); - return mask - 1; -} - -/* is there high variance internal edge ( 11111111 yes, 00000000 no) */ -static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) -{ - signed char hev = 0; - hev |= (abs(p1 - p0) > thresh) * -1; - hev |= (abs(q1 - q0) > thresh) * -1; - return hev; -} - -static void vp8_filter(signed char mask, uc hev, uc *op1, - uc *op0, uc *oq0, uc *oq1) - -{ - signed char ps0, qs0; - signed char ps1, qs1; - signed char filter_value, Filter1, Filter2; - signed char u; - - ps1 = (signed char) * op1 ^ 0x80; - ps0 = (signed char) * op0 ^ 0x80; - qs0 = (signed char) * oq0 ^ 0x80; - qs1 = (signed char) * oq1 ^ 0x80; - - /* add outer taps if we have high edge variance */ - filter_value = vp8_signed_char_clamp(ps1 - qs1); - filter_value &= hev; - - /* inner taps */ - filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); - filter_value &= mask; - - /* save bottom 3 bits so that we round one side +4 and the other +3 - * if it equals 4 we'll set to adjust by -1 to account for the fact - * we'd round 3 the other way - */ - Filter1 = vp8_signed_char_clamp(filter_value + 4); - Filter2 = vp8_signed_char_clamp(filter_value + 3); - Filter1 >>= 3; - Filter2 >>= 3; - u = vp8_signed_char_clamp(qs0 - Filter1); - *oq0 = u ^ 0x80; - u = vp8_signed_char_clamp(ps0 + Filter2); - *op0 = u ^ 0x80; - filter_value = Filter1; - - /* outer tap adjustments */ - filter_value += 1; - filter_value >>= 1; - filter_value &= ~hev; - - u = vp8_signed_char_clamp(qs1 - filter_value); - *oq1 = u ^ 0x80; - u = vp8_signed_char_clamp(ps1 + filter_value); - *op1 = u ^ 0x80; - -} -void vp8_loop_filter_horizontal_edge_c -( - unsigned char *s, - int p, /* pitch */ - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - int hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do - { - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4*p], s[-3*p], s[-2*p], s[-1*p], - s[0*p], s[1*p], s[2*p], s[3*p]); - - hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); - - vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p); - - ++s; - } - while (++i < count * 8); -} - -void vp8_loop_filter_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - int hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do - { - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); - - hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - - vp8_filter(mask, hev, s - 2, s - 1, s, s + 1); - - s += p; - } - while (++i < count * 8); -} - -static void vp8_mbfilter(signed char mask, uc hev, - uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2) -{ - signed char s, u; - signed char filter_value, Filter1, Filter2; - signed char ps2 = (signed char) * op2 ^ 0x80; - signed char ps1 = (signed char) * op1 ^ 0x80; - signed char ps0 = (signed char) * op0 ^ 0x80; - signed char qs0 = (signed char) * oq0 ^ 0x80; - signed char qs1 = (signed char) * oq1 ^ 0x80; - signed char qs2 = (signed char) * oq2 ^ 0x80; - - /* add outer taps if we have high edge variance */ - filter_value = vp8_signed_char_clamp(ps1 - qs1); - filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); - filter_value &= mask; - - Filter2 = filter_value; - Filter2 &= hev; - - /* save bottom 3 bits so that we round one side +4 and the other +3 */ - Filter1 = vp8_signed_char_clamp(Filter2 + 4); - Filter2 = vp8_signed_char_clamp(Filter2 + 3); - Filter1 >>= 3; - Filter2 >>= 3; - qs0 = vp8_signed_char_clamp(qs0 - Filter1); - ps0 = vp8_signed_char_clamp(ps0 + Filter2); - - - /* only apply wider filter if not high edge variance */ - filter_value &= ~hev; - Filter2 = filter_value; - - /* roughly 3/7th difference across boundary */ - u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7); - s = vp8_signed_char_clamp(qs0 - u); - *oq0 = s ^ 0x80; - s = vp8_signed_char_clamp(ps0 + u); - *op0 = s ^ 0x80; - - /* roughly 2/7th difference across boundary */ - u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7); - s = vp8_signed_char_clamp(qs1 - u); - *oq1 = s ^ 0x80; - s = vp8_signed_char_clamp(ps1 + u); - *op1 = s ^ 0x80; - - /* roughly 1/7th difference across boundary */ - u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); - s = vp8_signed_char_clamp(qs2 - u); - *oq2 = s ^ 0x80; - s = vp8_signed_char_clamp(ps2 + u); - *op2 = s ^ 0x80; -} - -void vp8_mbloop_filter_horizontal_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do - { - - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4*p], s[-3*p], s[-2*p], s[-1*p], - s[0*p], s[1*p], s[2*p], s[3*p]); - - hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); - - vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p); - - ++s; - } - while (++i < count * 8); - -} - - -void vp8_mbloop_filter_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - do - { - - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); - - hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - - vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2); - - s += p; - } - while (++i < count * 8); - -} - -/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1) -{ -/* Why does this cause problems for win32? - * error C2143: syntax error : missing ';' before 'type' - * (void) limit; - */ - signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; - return mask; -} - -static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) -{ - signed char filter_value, Filter1, Filter2; - signed char p1 = (signed char) * op1 ^ 0x80; - signed char p0 = (signed char) * op0 ^ 0x80; - signed char q0 = (signed char) * oq0 ^ 0x80; - signed char q1 = (signed char) * oq1 ^ 0x80; - signed char u; - - filter_value = vp8_signed_char_clamp(p1 - q1); - filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0)); - filter_value &= mask; - - /* save bottom 3 bits so that we round one side +4 and the other +3 */ - Filter1 = vp8_signed_char_clamp(filter_value + 4); - Filter1 >>= 3; - u = vp8_signed_char_clamp(q0 - Filter1); - *oq0 = u ^ 0x80; - - Filter2 = vp8_signed_char_clamp(filter_value + 3); - Filter2 >>= 3; - u = vp8_signed_char_clamp(p0 + Filter2); - *op0 = u ^ 0x80; -} - -void vp8_loop_filter_simple_horizontal_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit -) -{ - signed char mask = 0; - int i = 0; - - do - { - mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); - vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p); - ++s; - } - while (++i < 16); -} - -void vp8_loop_filter_simple_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit -) -{ - signed char mask = 0; - int i = 0; - - do - { - mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]); - vp8_simple_filter(mask, s - 2, s - 1, s, s + 1); - s += p; - } - while (++i < 16); - -} - -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit); -} - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit); -} diff --git a/thirdparty/libvpx/vp8/common/mbpitch.c b/thirdparty/libvpx/vp8/common/mbpitch.c deleted file mode 100644 index 32e1b66409..0000000000 --- a/thirdparty/libvpx/vp8/common/mbpitch.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "blockd.h" - -void vp8_setup_block_dptrs(MACROBLOCKD *x) -{ - int r, c; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4; - } - } - - for (r = 0; r < 2; r++) - { - for (c = 0; c < 2; c++) - { - x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4; - - } - } - - for (r = 0; r < 2; r++) - { - for (c = 0; c < 2; c++) - { - x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4; - - } - } - - for (r = 0; r < 25; r++) - { - x->block[r].qcoeff = x->qcoeff + r * 16; - x->block[r].dqcoeff = x->dqcoeff + r * 16; - x->block[r].eob = x->eobs + r; - } -} - -void vp8_build_block_doffsets(MACROBLOCKD *x) -{ - int block; - - for (block = 0; block < 16; block++) /* y blocks */ - { - x->block[block].offset = - (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4; - } - - for (block = 16; block < 20; block++) /* U and V blocks */ - { - x->block[block+4].offset = - x->block[block].offset = - ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4; - } -} diff --git a/thirdparty/libvpx/vp8/common/modecont.c b/thirdparty/libvpx/vp8/common/modecont.c deleted file mode 100644 index 86a74bc0ff..0000000000 --- a/thirdparty/libvpx/vp8/common/modecont.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "entropy.h" - -const int vp8_mode_contexts[6][4] = -{ - { - /* 0 */ - 7, 1, 1, 143, - }, - { - /* 1 */ - 14, 18, 14, 107, - }, - { - /* 2 */ - 135, 64, 57, 68, - }, - { - /* 3 */ - 60, 56, 128, 65, - }, - { - /* 4 */ - 159, 134, 128, 34, - }, - { - /* 5 */ - 234, 188, 128, 28, - }, -}; diff --git a/thirdparty/libvpx/vp8/common/modecont.h b/thirdparty/libvpx/vp8/common/modecont.h deleted file mode 100644 index ff34c33c55..0000000000 --- a/thirdparty/libvpx/vp8/common/modecont.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_MODECONT_H_ -#define VP8_COMMON_MODECONT_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -extern const int vp8_mode_contexts[6][4]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_MODECONT_H_ diff --git a/thirdparty/libvpx/vp8/common/mv.h b/thirdparty/libvpx/vp8/common/mv.h deleted file mode 100644 index 111ccd63c7..0000000000 --- a/thirdparty/libvpx/vp8/common/mv.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_MV_H_ -#define VP8_COMMON_MV_H_ -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ - short row; - short col; -} MV; - -typedef union int_mv -{ - uint32_t as_int; - MV as_mv; -} int_mv; /* facilitates faster equality tests and copies */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_MV_H_ diff --git a/thirdparty/libvpx/vp8/common/onyxc_int.h b/thirdparty/libvpx/vp8/common/onyxc_int.h deleted file mode 100644 index 6d89865c60..0000000000 --- a/thirdparty/libvpx/vp8/common/onyxc_int.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ONYXC_INT_H_ -#define VP8_COMMON_ONYXC_INT_H_ - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "loopfilter.h" -#include "entropymv.h" -#include "entropy.h" -#if CONFIG_POSTPROC -#include "postproc.h" -#endif - -/*#ifdef PACKET_TESTING*/ -#include "header.h" -/*#endif*/ - -#ifdef __cplusplus -extern "C" { -#endif - -#define MINQ 0 -#define MAXQ 127 -#define QINDEX_RANGE (MAXQ + 1) - -#define NUM_YV12_BUFFERS 4 - -#define MAX_PARTITIONS 9 - -typedef struct frame_contexts -{ - vp8_prob bmode_prob [VP8_BINTRAMODES-1]; - vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */ - vp8_prob uv_mode_prob [VP8_UV_MODES-1]; - vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; - vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - MV_CONTEXT mvc[2]; -} FRAME_CONTEXT; - -typedef enum -{ - ONE_PARTITION = 0, - TWO_PARTITION = 1, - FOUR_PARTITION = 2, - EIGHT_PARTITION = 3 -} TOKEN_PARTITION; - -typedef enum -{ - RECON_CLAMP_REQUIRED = 0, - RECON_CLAMP_NOTREQUIRED = 1 -} CLAMP_TYPE; - -typedef struct VP8Common - -{ - struct vpx_internal_error_info error; - - DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]); - DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]); - DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]); - - int Width; - int Height; - int horiz_scale; - int vert_scale; - - CLAMP_TYPE clamp_type; - - YV12_BUFFER_CONFIG *frame_to_show; - - YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS]; - int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; - int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx; - - YV12_BUFFER_CONFIG temp_scale_frame; - -#if CONFIG_POSTPROC - YV12_BUFFER_CONFIG post_proc_buffer; - YV12_BUFFER_CONFIG post_proc_buffer_int; - int post_proc_buffer_int_used; - unsigned char *pp_limits_buffer; /* post-processing filter coefficients */ -#endif - - FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ - FRAME_TYPE frame_type; - - int show_frame; - - int frame_flags; - int MBs; - int mb_rows; - int mb_cols; - int mode_info_stride; - - /* profile settings */ - int mb_no_coeff_skip; - int no_lpf; - int use_bilinear_mc_filter; - int full_pixel; - - int base_qindex; - - int y1dc_delta_q; - int y2dc_delta_q; - int y2ac_delta_q; - int uvdc_delta_q; - int uvac_delta_q; - - /* We allocate a MODE_INFO struct for each macroblock, together with - an extra row on top and column on the left to simplify prediction. */ - - MODE_INFO *mip; /* Base of allocated array */ - MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ -#if CONFIG_ERROR_CONCEALMENT - MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ - MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ -#endif - MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame - to show */ - LOOPFILTERTYPE filter_type; - - loop_filter_info_n lf_info; - - int filter_level; - int last_sharpness_level; - int sharpness_level; - - int refresh_last_frame; /* Two state 0 = NO, 1 = YES */ - int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */ - int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */ - - int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */ - int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */ - - int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */ - - int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ - - /* Y,U,V,Y2 */ - ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ - ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ - - FRAME_CONTEXT lfc; /* last frame entropy */ - FRAME_CONTEXT fc; /* this frame entropy */ - - unsigned int current_video_frame; - - int version; - - TOKEN_PARTITION multi_token_partition; - -#ifdef PACKET_TESTING - VP8_HEADER oh; -#endif -#if CONFIG_POSTPROC_VISUALIZER - double bitrate; - double framerate; -#endif - -#if CONFIG_MULTITHREAD - int processor_core_count; -#endif -#if CONFIG_POSTPROC - struct postproc_state postproc_state; -#endif - int cpu_caps; -} VP8_COMMON; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ONYXC_INT_H_ diff --git a/thirdparty/libvpx/vp8/common/onyxd.h b/thirdparty/libvpx/vp8/common/onyxd.h deleted file mode 100644 index e37b29f32c..0000000000 --- a/thirdparty/libvpx/vp8/common/onyxd.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ONYXD_H_ -#define VP8_COMMON_ONYXD_H_ - - -/* Create/destroy static data structures. */ -#ifdef __cplusplus -extern "C" -{ -#endif -#include "vpx_scale/yv12config.h" -#include "ppflags.h" -#include "vpx_ports/mem.h" -#include "vpx/vpx_codec.h" -#include "vpx/vp8.h" - - struct VP8D_COMP; - - typedef struct - { - int Width; - int Height; - int Version; - int postprocess; - int max_threads; - int error_concealment; - } VP8D_CONFIG; - - typedef enum - { - VP8D_OK = 0 - } VP8D_SETTING; - - void vp8dx_initialize(void); - - void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x); - - int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst); - - int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, - size_t size, const uint8_t *dest, - int64_t time_stamp); - int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); - - vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); - vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); - -#ifdef __cplusplus -} -#endif - - -#endif // VP8_COMMON_ONYXD_H_ diff --git a/thirdparty/libvpx/vp8/common/ppflags.h b/thirdparty/libvpx/vp8/common/ppflags.h deleted file mode 100644 index 768224aad5..0000000000 --- a/thirdparty/libvpx/vp8/common/ppflags.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_PPFLAGS_H_ -#define VP8_COMMON_PPFLAGS_H_ - -#ifdef __cplusplus -extern "C" { -#endif -enum -{ - VP8D_NOFILTERING = 0, - VP8D_DEBLOCK = 1<<0, - VP8D_DEMACROBLOCK = 1<<1, - VP8D_ADDNOISE = 1<<2, - VP8D_DEBUG_TXT_FRAME_INFO = 1<<3, - VP8D_DEBUG_TXT_MBLK_MODES = 1<<4, - VP8D_DEBUG_TXT_DC_DIFF = 1<<5, - VP8D_DEBUG_TXT_RATE_INFO = 1<<6, - VP8D_DEBUG_DRAW_MV = 1<<7, - VP8D_DEBUG_CLR_BLK_MODES = 1<<8, - VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9, - VP8D_MFQE = 1<<10 -}; - -typedef struct -{ - int post_proc_flag; - int deblocking_level; - int noise_level; - int display_ref_frame_flag; - int display_mb_modes_flag; - int display_b_modes_flag; - int display_mv_flag; -} vp8_ppflags_t; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_PPFLAGS_H_ diff --git a/thirdparty/libvpx/vp8/common/quant_common.c b/thirdparty/libvpx/vp8/common/quant_common.c deleted file mode 100644 index 05f9210702..0000000000 --- a/thirdparty/libvpx/vp8/common/quant_common.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "quant_common.h" - -static const int dc_qlookup[QINDEX_RANGE] = -{ - 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, - 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, - 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, -}; - -static const int ac_qlookup[QINDEX_RANGE] = -{ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, - 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, - 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, - 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, - 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, - 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, -}; - - -int vp8_dc_quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = dc_qlookup[ QIndex ]; - return retval; -} - -int vp8_dc2quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = dc_qlookup[ QIndex ] * 2; - return retval; - -} -int vp8_dc_uv_quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = dc_qlookup[ QIndex ]; - - if (retval > 132) - retval = 132; - - return retval; -} - -int vp8_ac_yquant(int QIndex) -{ - int retval; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = ac_qlookup[ QIndex ]; - return retval; -} - -int vp8_ac2quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. - * The smallest precision for that is '(x*6349) >> 12' but 16 is a good - * word size. */ - retval = (ac_qlookup[ QIndex ] * 101581) >> 16; - - if (retval < 8) - retval = 8; - - return retval; -} -int vp8_ac_uv_quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = ac_qlookup[ QIndex ]; - return retval; -} diff --git a/thirdparty/libvpx/vp8/common/quant_common.h b/thirdparty/libvpx/vp8/common/quant_common.h deleted file mode 100644 index 700b5e6d72..0000000000 --- a/thirdparty/libvpx/vp8/common/quant_common.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_QUANT_COMMON_H_ -#define VP8_COMMON_QUANT_COMMON_H_ - - -#include "string.h" -#include "blockd.h" -#include "onyxc_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern int vp8_ac_yquant(int QIndex); -extern int vp8_dc_quant(int QIndex, int Delta); -extern int vp8_dc2quant(int QIndex, int Delta); -extern int vp8_ac2quant(int QIndex, int Delta); -extern int vp8_dc_uv_quant(int QIndex, int Delta); -extern int vp8_ac_uv_quant(int QIndex, int Delta); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_QUANT_COMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/reconinter.c b/thirdparty/libvpx/vp8/common/reconinter.c deleted file mode 100644 index e302595587..0000000000 --- a/thirdparty/libvpx/vp8/common/reconinter.c +++ /dev/null @@ -1,544 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx/vpx_integer.h" -#include "blockd.h" -#include "reconinter.h" -#if CONFIG_RUNTIME_CPU_DETECT -#include "onyxc_int.h" -#endif - -void vp8_copy_mem16x16_c( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) -{ - - int r; - - for (r = 0; r < 16; r++) - { - memcpy(dst, src, 16); - - src += src_stride; - dst += dst_stride; - - } - -} - -void vp8_copy_mem8x8_c( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) -{ - int r; - - for (r = 0; r < 8; r++) - { - memcpy(dst, src, 8); - - src += src_stride; - dst += dst_stride; - - } - -} - -void vp8_copy_mem8x4_c( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) -{ - int r; - - for (r = 0; r < 4; r++) - { - memcpy(dst, src, 8); - - src += src_stride; - dst += dst_stride; - - } - -} - - -void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) -{ - int r; - unsigned char *pred_ptr = d->predictor; - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); - } - else - { - for (r = 0; r < 4; r++) - { - pred_ptr[0] = ptr[0]; - pred_ptr[1] = ptr[1]; - pred_ptr[2] = ptr[2]; - pred_ptr[3] = ptr[3]; - pred_ptr += pitch; - ptr += pre_stride; - } - } -} - -static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) -{ - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); - } - else - { - vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride); - } -} - -static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) -{ - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); - } - else - { - vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride); - } -} - -static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) -{ - int r; - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); - } - else - { - for (r = 0; r < 4; r++) - { - dst[0] = ptr[0]; - dst[1] = ptr[1]; - dst[2] = ptr[2]; - dst[3] = ptr[3]; - dst += dst_stride; - ptr += pre_stride; - } - } -} - - -/*encoder only*/ -void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) -{ - unsigned char *uptr, *vptr; - unsigned char *upred_ptr = &x->predictor[256]; - unsigned char *vpred_ptr = &x->predictor[320]; - - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int offset; - int pre_stride = x->pre.uv_stride; - - /* calc uv motion vectors */ - mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1)); - mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1)); - mv_row /= 2; - mv_col /= 2; - mv_row &= x->fullpixel_mask; - mv_col &= x->fullpixel_mask; - - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; - - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8); - x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8); - } - else - { - vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8); - vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8); - } -} - -/*encoder only*/ -void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) -{ - int i, j; - int pre_stride = x->pre.uv_stride; - unsigned char *base_pre; - - /* build uv mvs */ - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - int yoffset = i * 8 + j * 2; - int uoffset = 16 + i * 2 + j; - int voffset = 20 + i * 2 + j; - - int temp; - - temp = x->block[yoffset ].bmi.mv.as_mv.row - + x->block[yoffset+1].bmi.mv.as_mv.row - + x->block[yoffset+4].bmi.mv.as_mv.row - + x->block[yoffset+5].bmi.mv.as_mv.row; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; - - temp = x->block[yoffset ].bmi.mv.as_mv.col - + x->block[yoffset+1].bmi.mv.as_mv.col - + x->block[yoffset+4].bmi.mv.as_mv.col - + x->block[yoffset+5].bmi.mv.as_mv.col; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; - - x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; - } - } - - base_pre = x->pre.u_buffer; - for (i = 16; i < 20; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); - else - { - vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); - } - } - - base_pre = x->pre.v_buffer; - for (i = 20; i < 24; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); - else - { - vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); - } - } -} - - -/*encoder only*/ -void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, - unsigned char *dst_y, - int dst_ystride) -{ - unsigned char *ptr_base; - unsigned char *ptr; - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->pre.y_stride; - - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, - dst_y, dst_ystride); - } - else - { - vp8_copy_mem16x16(ptr, pre_stride, dst_y, - dst_ystride); - } -} - -static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) -{ - /* If the MV points so far into the UMV border that no visible pixels - * are used for reconstruction, the subpel part of the MV can be - * discarded and the MV limited to 16 pixels with equivalent results. - * - * This limit kicks in at 19 pixels for the top and left edges, for - * the 16 pixels plus 3 taps right of the central pixel when subpel - * filtering. The bottom and right edges use 16 pixels plus 2 pixels - * left of the central pixel when filtering. - */ - if (mv->col < (xd->mb_to_left_edge - (19 << 3))) - mv->col = xd->mb_to_left_edge - (16 << 3); - else if (mv->col > xd->mb_to_right_edge + (18 << 3)) - mv->col = xd->mb_to_right_edge + (16 << 3); - - if (mv->row < (xd->mb_to_top_edge - (19 << 3))) - mv->row = xd->mb_to_top_edge - (16 << 3); - else if (mv->row > xd->mb_to_bottom_edge + (18 << 3)) - mv->row = xd->mb_to_bottom_edge + (16 << 3); -} - -/* A version of the above function for chroma block MVs.*/ -static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) -{ - mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? - (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; - mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? - (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; - - mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? - (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; - mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? - (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; -} - -void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, - int dst_ystride, - int dst_uvstride) -{ - int offset; - unsigned char *ptr; - unsigned char *uptr, *vptr; - - int_mv _16x16mv; - - unsigned char *ptr_base = x->pre.y_buffer; - int pre_stride = x->pre.y_stride; - - _16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int; - - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - { - clamp_mv_to_umv_border(&_16x16mv.as_mv, x); - } - - ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); - - if ( _16x16mv.as_int & 0x00070007) - { - x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride); - } - else - { - vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); - } - - /* calc uv motion vectors */ - _16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1)); - _16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1)); - _16x16mv.as_mv.row /= 2; - _16x16mv.as_mv.col /= 2; - _16x16mv.as_mv.row &= x->fullpixel_mask; - _16x16mv.as_mv.col &= x->fullpixel_mask; - - pre_stride >>= 1; - offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; - - if ( _16x16mv.as_int & 0x00070007) - { - x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride); - x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride); - } - else - { - vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride); - vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride); - } -} - -static void build_inter4x4_predictors_mb(MACROBLOCKD *x) -{ - int i; - unsigned char *base_dst = x->dst.y_buffer; - unsigned char *base_pre = x->pre.y_buffer; - - if (x->mode_info_context->mbmi.partitioning < 3) - { - BLOCKD *b; - int dst_stride = x->dst.y_stride; - - x->block[ 0].bmi = x->mode_info_context->bmi[ 0]; - x->block[ 2].bmi = x->mode_info_context->bmi[ 2]; - x->block[ 8].bmi = x->mode_info_context->bmi[ 8]; - x->block[10].bmi = x->mode_info_context->bmi[10]; - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - { - clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x); - } - - b = &x->block[ 0]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - b = &x->block[ 2]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - b = &x->block[ 8]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - b = &x->block[10]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - } - else - { - for (i = 0; i < 16; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - int dst_stride = x->dst.y_stride; - - x->block[i+0].bmi = x->mode_info_context->bmi[i+0]; - x->block[i+1].bmi = x->mode_info_context->bmi[i+1]; - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - { - clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x); - } - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); - else - { - build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - } - - } - - } - base_dst = x->dst.u_buffer; - base_pre = x->pre.u_buffer; - for (i = 16; i < 20; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - int dst_stride = x->dst.uv_stride; - - /* Note: uv mvs already clamped in build_4x4uvmvs() */ - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); - else - { - build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - } - } - - base_dst = x->dst.v_buffer; - base_pre = x->pre.v_buffer; - for (i = 20; i < 24; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - int dst_stride = x->dst.uv_stride; - - /* Note: uv mvs already clamped in build_4x4uvmvs() */ - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); - else - { - build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - } - } -} - -static -void build_4x4uvmvs(MACROBLOCKD *x) -{ - int i, j; - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - int yoffset = i * 8 + j * 2; - int uoffset = 16 + i * 2 + j; - int voffset = 20 + i * 2 + j; - - int temp; - - temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row - + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row - + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row - + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; - - temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col - + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col - + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col - + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; - - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x); - - x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; - } - } -} - -void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) -{ - if (xd->mode_info_context->mbmi.mode != SPLITMV) - { - vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); - } - else - { - build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd); - } -} diff --git a/thirdparty/libvpx/vp8/common/reconinter.h b/thirdparty/libvpx/vp8/common/reconinter.h deleted file mode 100644 index ba979b9664..0000000000 --- a/thirdparty/libvpx/vp8/common/reconinter.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_RECONINTER_H_ -#define VP8_COMMON_RECONINTER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x); -extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, - int dst_ystride, - int dst_uvstride); - - -extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, - unsigned char *dst_y, - int dst_ystride); -extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, - unsigned char *base_pre, - int pre_stride, - vp8_subpix_fn_t sppf); - -extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x); -extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_RECONINTER_H_ diff --git a/thirdparty/libvpx/vp8/common/reconintra.c b/thirdparty/libvpx/vp8/common/reconintra.c deleted file mode 100644 index 356655dac7..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "./vp8_rtcd.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/vpx_once.h" -#include "blockd.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/reconintra4x4.h" - -enum { - SIZE_16, - SIZE_8, - NUM_SIZES, -}; - -typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left); - -static intra_pred_fn pred[4][NUM_SIZES]; -static intra_pred_fn dc_pred[2][2][NUM_SIZES]; - -static void vp8_init_intra_predictors_internal(void) -{ -#define INIT_SIZE(sz) \ - pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \ - pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \ - pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \ - \ - dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \ - dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \ - dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \ - dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz - - INIT_SIZE(16); - INIT_SIZE(8); - vp8_init_intra4x4_predictors_internal(); -} - -void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) -{ - MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode; - DECLARE_ALIGNED(16, uint8_t, yleft_col[16]); - int i; - intra_pred_fn fn; - - for (i = 0; i < 16; i++) - { - yleft_col[i] = yleft[i* left_stride]; - } - - if (mode == DC_PRED) - { - fn = dc_pred[x->left_available][x->up_available][SIZE_16]; - } - else - { - fn = pred[mode][SIZE_16]; - } - - fn(ypred_ptr, y_stride, yabove_row, yleft_col); -} - -void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride) -{ - MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode; - unsigned char uleft_col[8]; - unsigned char vleft_col[8]; - int i; - intra_pred_fn fn; - - for (i = 0; i < 8; i++) - { - uleft_col[i] = uleft[i * left_stride]; - vleft_col[i] = vleft[i * left_stride]; - } - - if (uvmode == DC_PRED) - { - fn = dc_pred[x->left_available][x->up_available][SIZE_8]; - } - else - { - fn = pred[uvmode][SIZE_8]; - } - - fn(upred_ptr, pred_stride, uabove_row, uleft_col); - fn(vpred_ptr, pred_stride, vabove_row, vleft_col); -} - -void vp8_init_intra_predictors(void) -{ - once(vp8_init_intra_predictors_internal); -} diff --git a/thirdparty/libvpx/vp8/common/reconintra.h b/thirdparty/libvpx/vp8/common/reconintra.h deleted file mode 100644 index b6225a6637..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_RECONINTRA_H_ -#define VP8_COMMON_RECONINTRA_H_ - -#include "vp8/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, - unsigned char *yabove_row, - unsigned char *yleft, - int left_stride, - unsigned char *ypred_ptr, - int y_stride); - -void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride); - -void vp8_init_intra_predictors(void); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_RECONINTRA_H_ diff --git a/thirdparty/libvpx/vp8/common/reconintra4x4.c b/thirdparty/libvpx/vp8/common/reconintra4x4.c deleted file mode 100644 index 35ad891eff..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra4x4.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vp8_rtcd.h" -#include "blockd.h" - -typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left); - -static intra_pred_fn pred[10]; - -void vp8_init_intra4x4_predictors_internal(void) -{ - pred[B_DC_PRED] = vpx_dc_predictor_4x4; - pred[B_TM_PRED] = vpx_tm_predictor_4x4; - pred[B_VE_PRED] = vpx_ve_predictor_4x4; - pred[B_HE_PRED] = vpx_he_predictor_4x4; - pred[B_LD_PRED] = vpx_d45e_predictor_4x4; - pred[B_RD_PRED] = vpx_d135_predictor_4x4; - pred[B_VR_PRED] = vpx_d117_predictor_4x4; - pred[B_VL_PRED] = vpx_d63f_predictor_4x4; - pred[B_HD_PRED] = vpx_d153_predictor_4x4; - pred[B_HU_PRED] = vpx_d207_predictor_4x4; -} - -void vp8_intra4x4_predict(unsigned char *above, - unsigned char *yleft, int left_stride, - B_PREDICTION_MODE b_mode, - unsigned char *dst, int dst_stride, - unsigned char top_left) -{ - unsigned char Left[4]; - unsigned char Aboveb[12], *Above = Aboveb + 4; - - Left[0] = yleft[0]; - Left[1] = yleft[left_stride]; - Left[2] = yleft[2 * left_stride]; - Left[3] = yleft[3 * left_stride]; - memcpy(Above, above, 8); - Above[-1] = top_left; - - pred[b_mode](dst, dst_stride, Above, Left); -} diff --git a/thirdparty/libvpx/vp8/common/reconintra4x4.h b/thirdparty/libvpx/vp8/common/reconintra4x4.h deleted file mode 100644 index 5dc5d13a5c..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra4x4.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_RECONINTRA4X4_H_ -#define VP8_COMMON_RECONINTRA4X4_H_ -#include "vp8/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd, - unsigned char *above_right_src) -{ - int dst_stride = xd->dst.y_stride; - unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16; - - unsigned int *src_ptr = (unsigned int *)above_right_src; - unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride); - unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride); - unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride); - - *dst_ptr0 = *src_ptr; - *dst_ptr1 = *src_ptr; - *dst_ptr2 = *src_ptr; -} - -void vp8_intra4x4_predict(unsigned char *Above, - unsigned char *yleft, int left_stride, - B_PREDICTION_MODE b_mode, - unsigned char *dst, int dst_stride, - unsigned char top_left); - -void vp8_init_intra4x4_predictors_internal(void); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_RECONINTRA4X4_H_ diff --git a/thirdparty/libvpx/vp8/common/rtcd.c b/thirdparty/libvpx/vp8/common/rtcd.c deleted file mode 100644 index ab0e9b47fe..0000000000 --- a/thirdparty/libvpx/vp8/common/rtcd.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vp8_rtcd.h" -#include "vpx_ports/vpx_once.h" - - -void vp8_rtcd() -{ - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vp8/common/setupintrarecon.c b/thirdparty/libvpx/vp8/common/setupintrarecon.c deleted file mode 100644 index 669564db42..0000000000 --- a/thirdparty/libvpx/vp8/common/setupintrarecon.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "setupintrarecon.h" -#include "vpx_mem/vpx_mem.h" - -void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) -{ - int i; - - /* set up frame new frame for intra coded blocks */ - memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); - for (i = 0; i < ybf->y_height; i++) - ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129; - - memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) - ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; - - memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) - ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; - -} - -void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) -{ - memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); - memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); -} diff --git a/thirdparty/libvpx/vp8/common/setupintrarecon.h b/thirdparty/libvpx/vp8/common/setupintrarecon.h deleted file mode 100644 index 1857c4e26a..0000000000 --- a/thirdparty/libvpx/vp8/common/setupintrarecon.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_SETUPINTRARECON_H_ -#define VP8_COMMON_SETUPINTRARECON_H_ - -#include "./vpx_config.h" -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif -extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); -extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); - -static INLINE void setup_intra_recon_left(unsigned char *y_buffer, - unsigned char *u_buffer, - unsigned char *v_buffer, - int y_stride, - int uv_stride) -{ - int i; - - for (i = 0; i < 16; i++) - y_buffer[y_stride *i] = (unsigned char) 129; - - for (i = 0; i < 8; i++) - u_buffer[uv_stride *i] = (unsigned char) 129; - - for (i = 0; i < 8; i++) - v_buffer[uv_stride *i] = (unsigned char) 129; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_SETUPINTRARECON_H_ diff --git a/thirdparty/libvpx/vp8/common/swapyv12buffer.c b/thirdparty/libvpx/vp8/common/swapyv12buffer.c deleted file mode 100644 index 73656b3d72..0000000000 --- a/thirdparty/libvpx/vp8/common/swapyv12buffer.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "swapyv12buffer.h" - -void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame) -{ - unsigned char *temp; - - temp = last_frame->buffer_alloc; - last_frame->buffer_alloc = new_frame->buffer_alloc; - new_frame->buffer_alloc = temp; - - temp = last_frame->y_buffer; - last_frame->y_buffer = new_frame->y_buffer; - new_frame->y_buffer = temp; - - temp = last_frame->u_buffer; - last_frame->u_buffer = new_frame->u_buffer; - new_frame->u_buffer = temp; - - temp = last_frame->v_buffer; - last_frame->v_buffer = new_frame->v_buffer; - new_frame->v_buffer = temp; - -} diff --git a/thirdparty/libvpx/vp8/common/swapyv12buffer.h b/thirdparty/libvpx/vp8/common/swapyv12buffer.h deleted file mode 100644 index 1d66cd3d62..0000000000 --- a/thirdparty/libvpx/vp8/common/swapyv12buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_SWAPYV12BUFFER_H_ -#define VP8_COMMON_SWAPYV12BUFFER_H_ - -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_SWAPYV12BUFFER_H_ diff --git a/thirdparty/libvpx/vp8/common/systemdependent.h b/thirdparty/libvpx/vp8/common/systemdependent.h deleted file mode 100644 index 3d44e37cf2..0000000000 --- a/thirdparty/libvpx/vp8/common/systemdependent.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_SYSTEMDEPENDENT_H_ -#define VP8_COMMON_SYSTEMDEPENDENT_H_ - -#include "vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP8Common; -void vp8_machine_specific_config(struct VP8Common *); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_SYSTEMDEPENDENT_H_ diff --git a/thirdparty/libvpx/vp8/common/threading.h b/thirdparty/libvpx/vp8/common/threading.h deleted file mode 100644 index 183b49b8ff..0000000000 --- a/thirdparty/libvpx/vp8/common/threading.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_THREADING_H_ -#define VP8_COMMON_THREADING_H_ - -#include "./vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD - -/* Thread management macros */ -#if defined(_WIN32) && !HAVE_PTHREAD_H -/* Win32 */ -#include -#include -#define THREAD_FUNCTION unsigned int __stdcall -#define THREAD_FUNCTION_RETURN DWORD -#define THREAD_SPECIFIC_INDEX DWORD -#define pthread_t HANDLE -#define pthread_attr_t DWORD -#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) -#define thread_sleep(nms) Sleep(nms) -#define pthread_cancel(thread) terminate_thread(thread,0) -#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();}; -#define pthread_getspecific(ts_key) TlsGetValue(ts_key) -#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value) -#define pthread_self() GetCurrentThreadId() - -#elif defined(__OS2__) -/* OS/2 */ -#define INCL_DOS -#include - -#include -#define THREAD_FUNCTION void * -#define THREAD_FUNCTION_RETURN void * -#define THREAD_SPECIFIC_INDEX PULONG -#define pthread_t TID -#define pthread_attr_t ULONG -#define pthread_detach(thread) 0 -#define thread_sleep(nms) DosSleep(nms) -#define pthread_cancel(thread) DosKillThread(thread) -#define ts_key_create(ts_key, destructor) \ - DosAllocThreadLocalMemory(1, &(ts_key)); -#define pthread_getspecific(ts_key) ((void *)(*(ts_key))) -#define pthread_setspecific(ts_key, value) (*(ts_key)=(ULONG)(value)) -#define pthread_self() _gettid() -#else -#ifdef __APPLE__ -#include -#include -#include -#include -#include - -#else -#include -#endif - -#include -/* pthreads */ -/* Nearly everything is already defined */ -#define THREAD_FUNCTION void * -#define THREAD_FUNCTION_RETURN void * -#define THREAD_SPECIFIC_INDEX pthread_key_t -#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor); -#endif - -/* Synchronization macros: Win32 and Pthreads */ -#if defined(_WIN32) && !HAVE_PTHREAD_H -#define sem_t HANDLE -#define pause(voidpara) __asm PAUSE -#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL) -#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE)) -#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL) -#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE) -#define thread_sleep(nms) Sleep(nms) - -#elif defined(__OS2__) -typedef struct -{ - HEV event; - HMTX wait_mutex; - HMTX count_mutex; - int count; -} sem_t; - -static inline int sem_init(sem_t *sem, int pshared, unsigned int value) -{ - DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0, - value > 0 ? TRUE : FALSE); - DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE); - DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE); - - sem->count = value; - - return 0; -} - -static inline int sem_wait(sem_t * sem) -{ - DosRequestMutexSem(sem->wait_mutex, -1); - - DosWaitEventSem(sem->event, -1); - - DosRequestMutexSem(sem->count_mutex, -1); - - sem->count--; - if (sem->count == 0) - { - ULONG post_count; - - DosResetEventSem(sem->event, &post_count); - } - - DosReleaseMutexSem(sem->count_mutex); - - DosReleaseMutexSem(sem->wait_mutex); - - return 0; -} - -static inline int sem_post(sem_t * sem) -{ - DosRequestMutexSem(sem->count_mutex, -1); - - if (sem->count < 32768) - { - sem->count++; - DosPostEventSem(sem->event); - } - - DosReleaseMutexSem(sem->count_mutex); - - return 0; -} - -static inline int sem_destroy(sem_t * sem) -{ - DosCloseEventSem(sem->event); - DosCloseMutexSem(sem->wait_mutex); - DosCloseMutexSem(sem->count_mutex); - - return 0; -} - -#define thread_sleep(nms) DosSleep(nms) - -#else - -#ifdef __APPLE__ -#define sem_t semaphore_t -#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z) -#define sem_wait(sem) (semaphore_wait(*sem) ) -#define sem_post(sem) semaphore_signal(*sem) -#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem) -#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ -#else -#include -#include -#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ -#endif -/* Not Windows. Assume pthreads */ - -#endif - -#if ARCH_X86 || ARCH_X86_64 -#include "vpx_ports/x86.h" -#else -#define x86_pause_hint() -#endif - -#include "vpx_util/vpx_thread.h" - -static INLINE void mutex_lock(pthread_mutex_t *const mutex) { - const int kMaxTryLocks = 4000; - int locked = 0; - int i; - - for (i = 0; i < kMaxTryLocks; ++i) { - if (!pthread_mutex_trylock(mutex)) { - locked = 1; - break; - } - } - - if (!locked) - pthread_mutex_lock(mutex); -} - -static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) { - int ret; - mutex_lock(mutex); - ret = *p; - pthread_mutex_unlock(mutex); - return ret; -} - -static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col, - const int *last_row_current_mb_col, - const int nsync) { - while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) { - x86_pause_hint(); - thread_sleep(0); - } -} - -static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) { - mutex_lock(mutex); - *p = v; - pthread_mutex_unlock(mutex); -} - -#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_THREADING_H_ diff --git a/thirdparty/libvpx/vp8/common/treecoder.c b/thirdparty/libvpx/vp8/common/treecoder.c deleted file mode 100644 index d80c64bdfa..0000000000 --- a/thirdparty/libvpx/vp8/common/treecoder.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if CONFIG_DEBUG -#include -#endif -#include - -#include "treecoder.h" - -static void tree2tok( - struct vp8_token_struct *const p, - vp8_tree t, - int i, - int v, - int L -) -{ - v += v; - ++L; - - do - { - const vp8_tree_index j = t[i++]; - - if (j <= 0) - { - p[-j].value = v; - p[-j].Len = L; - } - else - tree2tok(p, t, j, v, L); - } - while (++v & 1); -} - -void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t) -{ - tree2tok(p, t, 0, 0, 0); -} - -void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t, - int offset) -{ - tree2tok(p - offset, t, 0, 0, 0); -} - -static void branch_counts( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ] -) -{ - const int tree_len = n - 1; - int t = 0; - -#if CONFIG_DEBUG - assert(tree_len); -#endif - - do - { - branch_ct[t][0] = branch_ct[t][1] = 0; - } - while (++t < tree_len); - - t = 0; - - do - { - int L = tok[t].Len; - const int enc = tok[t].value; - const unsigned int ct = num_events[t]; - - vp8_tree_index i = 0; - - do - { - const int b = (enc >> --L) & 1; - const int j = i >> 1; -#if CONFIG_DEBUG - assert(j < tree_len && 0 <= L); -#endif - - branch_ct [j] [b] += ct; - i = tree[ i + b]; - } - while (i > 0); - -#if CONFIG_DEBUG - assert(!L); -#endif - } - while (++t < n); - -} - - -void vp8_tree_probs_from_distribution( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - vp8_prob probs [ /* n-1 */ ], - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - unsigned int Pfac, - int rd -) -{ - const int tree_len = n - 1; - int t = 0; - - branch_counts(n, tok, tree, branch_ct, num_events); - - do - { - const unsigned int *const c = branch_ct[t]; - const unsigned int tot = c[0] + c[1]; - -#if CONFIG_DEBUG - assert(tot < (1 << 24)); /* no overflow below */ -#endif - - if (tot) - { - const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot; - probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */ - } - else - probs[t] = vp8_prob_half; - } - while (++t < tree_len); -} diff --git a/thirdparty/libvpx/vp8/common/treecoder.h b/thirdparty/libvpx/vp8/common/treecoder.h deleted file mode 100644 index d22b7c570c..0000000000 --- a/thirdparty/libvpx/vp8/common/treecoder.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_TREECODER_H_ -#define VP8_COMMON_TREECODER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef unsigned char vp8bc_index_t; /* probability index */ - - -typedef unsigned char vp8_prob; - -#define vp8_prob_half ( (vp8_prob) 128) - -typedef signed char vp8_tree_index; -struct bool_coder_spec; - -typedef struct bool_coder_spec bool_coder_spec; -typedef struct bool_writer bool_writer; -typedef struct bool_reader bool_reader; - -typedef const bool_coder_spec c_bool_coder_spec; -typedef const bool_writer c_bool_writer; -typedef const bool_reader c_bool_reader; - - - -# define vp8_complement( x) (255 - x) - - -/* We build coding trees compactly in arrays. - Each node of the tree is a pair of vp8_tree_indices. - Array index often references a corresponding probability table. - Index <= 0 means done encoding/decoding and value = -Index, - Index > 0 means need another bit, specification at index. - Nonnegative indices are always even; processing begins at node 0. */ - -typedef const vp8_tree_index vp8_tree[], *vp8_tree_p; - - -typedef const struct vp8_token_struct -{ - int value; - int Len; -} vp8_token; - -/* Construct encoding array from tree. */ - -void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree); -void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree, - int offset); - - -/* Convert array of token occurrence counts into a table of probabilities - for the associated binary encoding tree. Also writes count of branches - taken for each node on the tree; this facilitiates decisions as to - probability updates. */ - -void vp8_tree_probs_from_distribution( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - vp8_prob probs [ /* n-1 */ ], - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - unsigned int Pfactor, - int Round -); - -/* Variant of above using coder spec rather than hardwired 8-bit probs. */ - -void vp8bc_tree_probs_from_distribution( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - vp8_prob probs [ /* n-1 */ ], - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - c_bool_coder_spec *s -); - - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_TREECODER_H_ diff --git a/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h b/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h deleted file mode 100644 index c4aed49897..0000000000 --- a/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. -*/ - -#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_ -#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/*Generated file, included by entropymode.c*/ - - -const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] = -{ - { 0, 1 }, - { 2, 2 }, - { 6, 3 }, - { 28, 5 }, - { 30, 5 }, - { 58, 6 }, - { 59, 6 }, - { 62, 6 }, - { 126, 7 }, - { 127, 7 } -}; - -const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] = -{ - { 0, 1 }, - { 4, 3 }, - { 5, 3 }, - { 6, 3 }, - { 7, 3 } -}; - -const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] = -{ - { 4, 3 }, - { 5, 3 }, - { 6, 3 }, - { 7, 3 }, - { 0, 1 } -}; - -const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] = -{ - { 0, 1 }, - { 2, 2 }, - { 6, 3 }, - { 7, 3 } -}; - -const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] = -{ - { 6, 3 }, - { 7, 3 }, - { 2, 2 }, - { 0, 1 } -}; - -const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] = -{ - { 2, 2 }, - { 6, 3 }, - { 0, 1 }, - { 14, 4 }, - { 15, 4 } -}; - -const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] = -{ - { 0, 1 }, - { 2, 2 }, - { 6, 3 }, - { 7, 3 } -}; - -const struct vp8_token_struct vp8_small_mvencodings[8] = -{ - { 0, 3 }, - { 1, 3 }, - { 2, 3 }, - { 3, 3 }, - { 4, 3 }, - { 5, 3 }, - { 6, 3 }, - { 7, 3 } -}; - -const vp8_prob vp8_ymode_prob[VP8_YMODES-1] = -{ - 112, 86, 140, 37 -}; - -const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1] = -{ - 145, 156, 163, 128 -}; - -const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES-1] = -{ - 162, 101, 204 -}; - -const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1] = -{ - 142, 114, 183 -}; - -const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES-1] = -{ - 120, 90, 79, 133, 87, 85, 80, 111, 151 -}; - - - -const vp8_prob vp8_kf_bmode_prob -[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1] = -{ - { - { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, - { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, - { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, - { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, - { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, - { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, - { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, - { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, - { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, - { 81, 40, 11, 96, 182, 84, 29, 16, 36 } - }, - { - { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, - { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, - { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, - { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, - { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, - { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, - { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, - { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, - { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, - { 66, 45, 25, 102, 197, 189, 23, 18, 22 } - }, - { - { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, - { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, - { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, - { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, - { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, - { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, - { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, - { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, - { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, - { 62, 18, 78, 95, 85, 57, 50, 48, 51 } - }, - { - { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, - { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, - { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, - { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, - { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, - { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, - { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, - { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, - { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, - { 51, 50, 17, 168, 209, 192, 23, 25, 82 } - }, - { - { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, - { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, - { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, - { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, - { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, - { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, - { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, - { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, - { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, - { 117, 20, 15, 36, 163, 128, 68, 1, 26 } - }, - { - { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, - { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, - { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, - { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, - { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, - { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, - { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, - { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, - { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, - { 87, 37, 9, 115, 59, 77, 64, 21, 47 } - }, - { - { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, - { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, - { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, - { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, - { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, - { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, - { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, - { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, - { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, - { 55, 38, 70, 124, 73, 102, 1, 34, 98 } - }, - { - { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, - { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, - { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, - { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, - { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, - { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, - { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, - { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, - { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, - { 58, 15, 20, 82, 135, 57, 26, 121, 40 } - }, - { - { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, - { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, - { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, - { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, - { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, - { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, - { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, - { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, - { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, - { 35, 27, 10, 146, 174, 171, 12, 26, 128 } - }, - { - { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, - { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, - { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, - { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, - { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, - { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, - { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, - { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, - { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, - { 112, 19, 12, 61, 195, 128, 48, 4, 24 } - } -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_VP8_ENTROPYMODEDATA_H_ diff --git a/thirdparty/libvpx/vp8/common/vp8_loopfilter.c b/thirdparty/libvpx/vp8/common/vp8_loopfilter.c deleted file mode 100644 index 756ad488f9..0000000000 --- a/thirdparty/libvpx/vp8/common/vp8_loopfilter.c +++ /dev/null @@ -1,661 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "loopfilter.h" -#include "onyxc_int.h" -#include "vpx_mem/vpx_mem.h" - - -static void lf_init_lut(loop_filter_info_n *lfi) -{ - int filt_lvl; - - for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++) - { - if (filt_lvl >= 40) - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3; - } - else if (filt_lvl >= 20) - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2; - } - else if (filt_lvl >= 15) - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1; - } - else - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0; - } - } - - lfi->mode_lf_lut[DC_PRED] = 1; - lfi->mode_lf_lut[V_PRED] = 1; - lfi->mode_lf_lut[H_PRED] = 1; - lfi->mode_lf_lut[TM_PRED] = 1; - lfi->mode_lf_lut[B_PRED] = 0; - - lfi->mode_lf_lut[ZEROMV] = 1; - lfi->mode_lf_lut[NEARESTMV] = 2; - lfi->mode_lf_lut[NEARMV] = 2; - lfi->mode_lf_lut[NEWMV] = 2; - lfi->mode_lf_lut[SPLITMV] = 3; - -} - -void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, - int sharpness_lvl) -{ - int i; - - /* For each possible value for the loop filter fill out limits */ - for (i = 0; i <= MAX_LOOP_FILTER; i++) - { - int filt_lvl = i; - int block_inside_limit = 0; - - /* Set loop filter paramaeters that control sharpness. */ - block_inside_limit = filt_lvl >> (sharpness_lvl > 0); - block_inside_limit = block_inside_limit >> (sharpness_lvl > 4); - - if (sharpness_lvl > 0) - { - if (block_inside_limit > (9 - sharpness_lvl)) - block_inside_limit = (9 - sharpness_lvl); - } - - if (block_inside_limit < 1) - block_inside_limit = 1; - - memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH); - memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH); - memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit), - SIMD_WIDTH); - } -} - -void vp8_loop_filter_init(VP8_COMMON *cm) -{ - loop_filter_info_n *lfi = &cm->lf_info; - int i; - - /* init limits for given sharpness*/ - vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); - cm->last_sharpness_level = cm->sharpness_level; - - /* init LUT for lvl and hev thr picking */ - lf_init_lut(lfi); - - /* init hev threshold const vectors */ - for(i = 0; i < 4 ; i++) - { - memset(lfi->hev_thr[i], i, SIMD_WIDTH); - } -} - -void vp8_loop_filter_frame_init(VP8_COMMON *cm, - MACROBLOCKD *mbd, - int default_filt_lvl) -{ - int seg, /* segment number */ - ref, /* index in ref_lf_deltas */ - mode; /* index in mode_lf_deltas */ - - loop_filter_info_n *lfi = &cm->lf_info; - - /* update limits if sharpness has changed */ - if(cm->last_sharpness_level != cm->sharpness_level) - { - vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); - cm->last_sharpness_level = cm->sharpness_level; - } - - for(seg = 0; seg < MAX_MB_SEGMENTS; seg++) - { - int lvl_seg = default_filt_lvl; - int lvl_ref, lvl_mode; - - /* Note the baseline filter values for each segment */ - if (mbd->segmentation_enabled) - { - /* Abs value */ - if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) - { - lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; - } - else /* Delta Value */ - { - lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; - } - lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0; - } - - if (!mbd->mode_ref_lf_delta_enabled) - { - /* we could get rid of this if we assume that deltas are set to - * zero when not in use; encoder always uses deltas - */ - memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 ); - continue; - } - - /* INTRA_FRAME */ - ref = INTRA_FRAME; - - /* Apply delta for reference frame */ - lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; - - /* Apply delta for Intra modes */ - mode = 0; /* B_PRED */ - /* Only the split mode BPRED has a further special case */ - lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; - /* clamp */ - lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; - - lfi->lvl[seg][ref][mode] = lvl_mode; - - mode = 1; /* all the rest of Intra modes */ - /* clamp */ - lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; - lfi->lvl[seg][ref][mode] = lvl_mode; - - /* LAST, GOLDEN, ALT */ - for(ref = 1; ref < MAX_REF_FRAMES; ref++) - { - /* Apply delta for reference frame */ - lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; - - /* Apply delta for Inter modes */ - for (mode = 1; mode < 4; mode++) - { - lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; - /* clamp */ - lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; - - lfi->lvl[seg][ref][mode] = lvl_mode; - } - } - } -} - - -void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr) -{ - int mb_col; - int filter_level; - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - FRAME_TYPE frame_type = cm->frame_type; - - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - -} - -void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr) -{ - int mb_col; - int filter_level; - loop_filter_info_n *lfi_n = &cm->lf_info; - (void)post_uvstride; - - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post_ystride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post_ystride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (y_ptr, post_ystride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post_ystride, lfi_n->blim[filter_level]); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - -} -void vp8_loop_filter_frame(VP8_COMMON *cm, - MACROBLOCKD *mbd, - int frame_type) -{ - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - - int mb_row; - int mb_col; - int mb_rows = cm->mb_rows; - int mb_cols = cm->mb_cols; - - int filter_level; - - unsigned char *y_ptr, *u_ptr, *v_ptr; - - /* Point at base of Mb MODE_INFO list */ - const MODE_INFO *mode_info_context = cm->mi; - int post_y_stride = post->y_stride; - int post_uv_stride = post->uv_stride; - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init(cm, mbd, cm->filter_level); - - /* Set up the buffer pointers */ - y_ptr = post->y_buffer; - u_ptr = post->u_buffer; - v_ptr = post->v_buffer; - - /* vp8_filter each macro block */ - if (cm->filter_type == NORMAL_LOOPFILTER) - { - for (mb_row = 0; mb_row < mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - y_ptr += post_y_stride * 16 - post->y_width; - u_ptr += post_uv_stride * 8 - post->uv_width; - v_ptr += post_uv_stride * 8 - post->uv_width; - - mode_info_context++; /* Skip border mb */ - - } - } - else /* SIMPLE_LOOPFILTER */ - { - for (mb_row = 0; mb_row < mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - if (filter_level) - { - const unsigned char * mblim = lfi_n->mblim[filter_level]; - const unsigned char * blim = lfi_n->blim[filter_level]; - - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post_y_stride, mblim); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post_y_stride, blim); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (y_ptr, post_y_stride, mblim); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post_y_stride, blim); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - y_ptr += post_y_stride * 16 - post->y_width; - u_ptr += post_uv_stride * 8 - post->uv_width; - v_ptr += post_uv_stride * 8 - post->uv_width; - - mode_info_context++; /* Skip border mb */ - - } - } -} - -void vp8_loop_filter_frame_yonly -( - VP8_COMMON *cm, - MACROBLOCKD *mbd, - int default_filt_lvl -) -{ - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - - unsigned char *y_ptr; - int mb_row; - int mb_col; - - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - - int filter_level; - FRAME_TYPE frame_type = cm->frame_type; - - /* Point at base of Mb MODE_INFO list */ - const MODE_INFO *mode_info_context = cm->mi; - -#if 0 - if(default_filt_lvl == 0) /* no filter applied */ - return; -#endif - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl); - - /* Set up the buffer pointers */ - y_ptr = post->y_buffer; - - /* vp8_filter each macro block */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - if (cm->filter_type == NORMAL_LOOPFILTER) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - } - else - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - } - } - - y_ptr += 16; - mode_info_context ++; /* step to next MB */ - - } - - y_ptr += post->y_stride * 16 - post->y_width; - mode_info_context ++; /* Skip border mb */ - } - -} - -void vp8_loop_filter_partial_frame -( - VP8_COMMON *cm, - MACROBLOCKD *mbd, - int default_filt_lvl -) -{ - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - - unsigned char *y_ptr; - int mb_row; - int mb_col; - int mb_cols = post->y_width >> 4; - int mb_rows = post->y_height >> 4; - - int linestocopy; - - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - - int filter_level; - FRAME_TYPE frame_type = cm->frame_type; - - const MODE_INFO *mode_info_context; - -#if 0 - if(default_filt_lvl == 0) /* no filter applied */ - return; -#endif - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl); - - /* number of MB rows to use in partial filtering */ - linestocopy = mb_rows / PARTIAL_FRAME_FRACTION; - linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */ - - /* Set up the buffer pointers; partial image starts at ~middle of frame */ - y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride; - mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); - - /* vp8_filter each macro block */ - for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++) - { - for (mb_col = 0; mb_col < mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = - lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - if (cm->filter_type == NORMAL_LOOPFILTER) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - vp8_loop_filter_mbh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - } - else - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - - vp8_loop_filter_simple_mbh - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - } - } - - y_ptr += 16; - mode_info_context += 1; /* step to next MB */ - } - - y_ptr += post->y_stride * 16 - post->y_width; - mode_info_context += 1; /* Skip border mb */ - } -} diff --git a/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm b/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm deleted file mode 100644 index 86fae26956..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm +++ /dev/null @@ -1,93 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vp8_copy32xn_sse2( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp8_copy32xn_sse2) PRIVATE -sym(vp8_copy32xn_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;dst_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;dst_stride - movsxd rcx, dword ptr arg(4) ;height - -.block_copy_sse2_loopx4: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - movdqu xmm2, XMMWORD PTR [rsi + rax] - movdqu xmm3, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqu xmm4, XMMWORD PTR [rsi] - movdqu xmm5, XMMWORD PTR [rsi + 16] - movdqu xmm6, XMMWORD PTR [rsi + rax] - movdqu xmm7, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - movdqa XMMWORD PTR [rdi + rdx], xmm2 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 - - lea rdi, [rdi+rdx*2] - - movdqa XMMWORD PTR [rdi], xmm4 - movdqa XMMWORD PTR [rdi + 16], xmm5 - movdqa XMMWORD PTR [rdi + rdx], xmm6 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 - - lea rdi, [rdi+rdx*2] - - sub rcx, 4 - cmp rcx, 4 - jge .block_copy_sse2_loopx4 - - cmp rcx, 0 - je .copy_is_done - -.block_copy_sse2_loop: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - lea rsi, [rsi+rax] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - lea rdi, [rdi+rdx] - - sub rcx, 1 - jne .block_copy_sse2_loop - -.copy_is_done: - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm b/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm deleted file mode 100644 index d789a40ccf..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm +++ /dev/null @@ -1,146 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE_X3 0 -%if ABI_IS_32BIT - %define src_ptr rsi - %define src_stride rax - %define ref_ptr rdi - %define ref_stride rdx - %define end_ptr rcx - %define ret_var rbx - %define result_ptr arg(4) - %define max_sad arg(4) - %define height dword ptr arg(4) - push rbp - mov rbp, rsp - push rsi - push rdi - push rbx - - mov rsi, arg(0) ; src_ptr - mov rdi, arg(2) ; ref_ptr - - movsxd rax, dword ptr arg(1) ; src_stride - movsxd rdx, dword ptr arg(3) ; ref_stride -%else - %if LIBVPX_YASM_WIN64 - SAVE_XMM 7, u - %define src_ptr rcx - %define src_stride rdx - %define ref_ptr r8 - %define ref_stride r9 - %define end_ptr r10 - %define ret_var r11 - %define result_ptr [rsp+xmm_stack_space+8+4*8] - %define max_sad [rsp+xmm_stack_space+8+4*8] - %define height dword ptr [rsp+xmm_stack_space+8+4*8] - %else - %define src_ptr rdi - %define src_stride rsi - %define ref_ptr rdx - %define ref_stride rcx - %define end_ptr r9 - %define ret_var r10 - %define result_ptr r8 - %define max_sad r8 - %define height r8 - %endif -%endif - -%endmacro - -%macro STACK_FRAME_DESTROY_X3 0 - %define src_ptr - %define src_stride - %define ref_ptr - %define ref_stride - %define end_ptr - %define ret_var - %define result_ptr - %define max_sad - %define height - -%if ABI_IS_32BIT - pop rbx - pop rdi - pop rsi - pop rbp -%else - %if LIBVPX_YASM_WIN64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - - -;void vp8_copy32xn_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp8_copy32xn_sse3) PRIVATE -sym(vp8_copy32xn_sse3): - - STACK_FRAME_CREATE_X3 - -.block_copy_sse3_loopx4: - lea end_ptr, [src_ptr+src_stride*2] - - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] - movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] - movdqu xmm4, XMMWORD PTR [end_ptr] - movdqu xmm5, XMMWORD PTR [end_ptr + 16] - movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] - movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] - - lea src_ptr, [src_ptr+src_stride*4] - - lea end_ptr, [ref_ptr+ref_stride*2] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 - movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 - movdqa XMMWORD PTR [end_ptr], xmm4 - movdqa XMMWORD PTR [end_ptr + 16], xmm5 - movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 - movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 - - lea ref_ptr, [ref_ptr+ref_stride*4] - - sub height, 4 - cmp height, 4 - jge .block_copy_sse3_loopx4 - - ;Check to see if there is more rows need to be copied. - cmp height, 0 - je .copy_is_done - -.block_copy_sse3_loop: - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - lea src_ptr, [src_ptr+src_stride] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - lea ref_ptr, [ref_ptr+ref_stride] - - sub height, 1 - jne .block_copy_sse3_loop - -.copy_is_done: - STACK_FRAME_DESTROY_X3 diff --git a/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm b/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm deleted file mode 100644 index 4e551f00aa..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm +++ /dev/null @@ -1,258 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) -global sym(vp8_dequantize_b_impl_mmx) PRIVATE -sym(vp8_dequantize_b_impl_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;sq - mov rdi, arg(1) ;dq - mov rax, arg(2) ;q - - movq mm1, [rsi] - pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers. - movq [rdi], mm1 - - movq mm1, [rsi+8] - pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers. - movq [rdi+8], mm1 - - movq mm1, [rsi+16] - pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers. - movq [rdi+16], mm1 - - movq mm1, [rsi+24] - pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers. - movq [rdi+24], mm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void dequant_idct_add_mmx( -;short *input, 0 -;short *dq, 1 -;unsigned char *dest, 2 -;int stride) 3 -global sym(vp8_dequant_idct_add_mmx) PRIVATE -sym(vp8_dequant_idct_add_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - GET_GOT rbx - push rdi - ; end prolog - - mov rax, arg(0) ;input - mov rdx, arg(1) ;dq - - - movq mm0, [rax ] - pmullw mm0, [rdx] - - movq mm1, [rax +8] - pmullw mm1, [rdx +8] - - movq mm2, [rax+16] - pmullw mm2, [rdx+16] - - movq mm3, [rax+24] - pmullw mm3, [rdx+24] - - mov rdx, arg(2) ;dest - - pxor mm7, mm7 - - - movq [rax], mm7 - movq [rax+8], mm7 - - movq [rax+16],mm7 - movq [rax+24],mm7 - - - movsxd rdi, dword ptr arg(3) ;stride - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - movq mm3, mm5 ; 33 23 13 03 - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - paddw mm0, [GLOBAL(fours)] - - paddw mm2, [GLOBAL(fours)] - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - psraw mm2, 3 - - psraw mm0, 3 - psraw mm4, 3 - - psraw mm6, 3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - pxor mm7, mm7 - - movd mm4, [rdx] - punpcklbw mm4, mm7 - paddsw mm0, mm4 - packuswb mm0, mm7 - movd [rdx], mm0 - - movd mm4, [rdx+rdi] - punpcklbw mm4, mm7 - paddsw mm1, mm4 - packuswb mm1, mm7 - movd [rdx+rdi], mm1 - - movd mm4, [rdx+2*rdi] - punpcklbw mm4, mm7 - paddsw mm2, mm4 - packuswb mm2, mm7 - movd [rdx+rdi*2], mm2 - - add rdx, rdi - - movd mm4, [rdx+2*rdi] - punpcklbw mm4, mm7 - paddsw mm5, mm4 - packuswb mm5, mm7 - movd [rdx+rdi*2], mm5 - - ; begin epilog - pop rdi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -x_s1sqr2: - times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 4 dw 0x4E7B -align 16 -fours: - times 4 dw 0x0004 diff --git a/thirdparty/libvpx/vp8/common/x86/filter_x86.c b/thirdparty/libvpx/vp8/common/x86/filter_x86.c deleted file mode 100644 index 7f496ed7db..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/filter_x86.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp8/common/x86/filter_x86.h" - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = -{ - { 128, 128, 128, 128, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 112, 112, 112, 112 } -}; - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) = -{ - { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 } -}; diff --git a/thirdparty/libvpx/vp8/common/x86/filter_x86.h b/thirdparty/libvpx/vp8/common/x86/filter_x86.h deleted file mode 100644 index d282841bee..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/filter_x86.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_X86_FILTER_X86_H_ -#define VP8_COMMON_X86_FILTER_X86_H_ - -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with - * duplicated values */ - -/* duplicated 4x */ -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]); - -/* duplicated 8x */ -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_X86_FILTER_X86_H_ diff --git a/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c b/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c deleted file mode 100644 index f2532b34da..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vp8/common/blockd.h" -#include "vpx_mem/vpx_mem.h" - -extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); - -void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) -{ - short *sq = (short *) d->qcoeff; - short *dq = (short *) d->dqcoeff; - - vp8_dequantize_b_impl_mmx(sq, dq, DQC); -} - -void vp8_dequant_idct_add_y_block_mmx - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dst, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride, - dst+4, stride); - memset(q + 16, 0, 2 * sizeof(q[0])); - } - - if (eobs[2] > 1) - vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride); - else if (eobs[2] == 1) - { - vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride, - dst+8, stride); - memset(q + 32, 0, 2 * sizeof(q[0])); - } - - if (eobs[3] > 1) - vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride); - else if (eobs[3] == 1) - { - vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride, - dst+12, stride); - memset(q + 48, 0, 2 * sizeof(q[0])); - } - - q += 64; - dst += 4*stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_mmx - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dstu, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride, - dstu+4, stride); - memset(q + 16, 0, 2 * sizeof(q[0])); - } - - q += 32; - dstu += 4*stride; - eobs += 2; - } - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dstv, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride, - dstv+4, stride); - memset(q + 16, 0, 2 * sizeof(q[0])); - } - - q += 32; - dstv += 4*stride; - eobs += 2; - } -} diff --git a/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c b/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c deleted file mode 100644 index ae96ec858c..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" - -void vp8_idct_dequant_0_2x_sse2 - (short *q, short *dq , - unsigned char *dst, int dst_stride); -void vp8_idct_dequant_full_2x_sse2 - (short *q, short *dq , - unsigned char *dst, int dst_stride); - -void vp8_dequant_idct_add_y_block_sse2 - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (((short *)(eobs))[0]) - { - if (((short *)(eobs))[0] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride); - } - if (((short *)(eobs))[1]) - { - if (((short *)(eobs))[1] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride); - else - vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride); - } - q += 64; - dst += stride*4; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_sse2 - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - if (((short *)(eobs))[0]) - { - if (((short *)(eobs))[0] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); - } - q += 32; - dstu += stride*4; - - if (((short *)(eobs))[1]) - { - if (((short *)(eobs))[1] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); - } - q += 32; - - if (((short *)(eobs))[2]) - { - if (((short *)(eobs))[2] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); - } - q += 32; - dstv += stride*4; - - if (((short *)(eobs))[3]) - { - if (((short *)(eobs))[3] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); - } -} diff --git a/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm b/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm deleted file mode 100644 index 96fa2c60d0..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm +++ /dev/null @@ -1,295 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -; /**************************************************************************** -; * Notes: -; * -; * This implementation makes use of 16 bit fixed point version of two multiply -; * constants: -; * 1. sqrt(2) * cos (pi/8) -; * 2. sqrt(2) * sin (pi/8) -; * Because the first constant is bigger than 1, to maintain the same 16 bit -; * fixed point precision as the second one, we use a trick of -; * x * a = x + x*(a-1) -; * so -; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). -; * -; * For the second constant, because of the 16bit version is 35468, which -; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative -; * number. -; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x -; * -; **************************************************************************/ - - -;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, -;int pitch, unsigned char *dest,int stride) -global sym(vp8_short_idct4x4llm_mmx) PRIVATE -sym(vp8_short_idct4x4llm_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(0) ;input - mov rsi, arg(1) ;pred - - movq mm0, [rax ] - movq mm1, [rax+ 8] - movq mm2, [rax+16] - movq mm3, [rax+24] - -%if 0 - pxor mm7, mm7 - movq [rax], mm7 - movq [rax+8], mm7 - movq [rax+16],mm7 - movq [rax+24],mm7 -%endif - movsxd rax, dword ptr arg(2) ;pitch - mov rdx, arg(3) ;dest - movsxd rdi, dword ptr arg(4) ;stride - - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - movq mm3, mm5 ; 33 23 13 03 - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - paddw mm0, [GLOBAL(fours)] - - paddw mm2, [GLOBAL(fours)] - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - psraw mm2, 3 - - psraw mm0, 3 - psraw mm4, 3 - - psraw mm6, 3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - pxor mm7, mm7 - - movd mm4, [rsi] - punpcklbw mm4, mm7 - paddsw mm0, mm4 - packuswb mm0, mm7 - movd [rdx], mm0 - - movd mm4, [rsi+rax] - punpcklbw mm4, mm7 - paddsw mm1, mm4 - packuswb mm1, mm7 - movd [rdx+rdi], mm1 - - movd mm4, [rsi+2*rax] - punpcklbw mm4, mm7 - paddsw mm2, mm4 - packuswb mm2, mm7 - movd [rdx+rdi*2], mm2 - - add rdx, rdi - add rsi, rax - - movd mm4, [rsi+2*rax] - punpcklbw mm4, mm7 - paddsw mm5, mm4 - packuswb mm5, mm7 - movd [rdx+rdi*2], mm5 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_dc_only_idct_add_mmx( -;short input_dc, -;unsigned char *pred_ptr, -;int pred_stride, -;unsigned char *dst_ptr, -;int stride) -global sym(vp8_dc_only_idct_add_mmx) PRIVATE -sym(vp8_dc_only_idct_add_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - ; end prolog - - movd mm5, arg(0) ;input_dc - mov rax, arg(1) ;pred_ptr - movsxd rdx, dword ptr arg(2) ;pred_stride - - pxor mm0, mm0 - - paddw mm5, [GLOBAL(fours)] - lea rcx, [rdx + rdx*2] - - psraw mm5, 3 - - punpcklwd mm5, mm5 - - punpckldq mm5, mm5 - - movd mm1, [rax] - movd mm2, [rax+rdx] - movd mm3, [rax+2*rdx] - movd mm4, [rax+rcx] - - mov rax, arg(3) ;d -- destination - movsxd rdx, dword ptr arg(4) ;dst_stride - - punpcklbw mm1, mm0 - paddsw mm1, mm5 - packuswb mm1, mm0 ; pack and unpack to saturate - lea rcx, [rdx + rdx*2] - - punpcklbw mm2, mm0 - paddsw mm2, mm5 - packuswb mm2, mm0 ; pack and unpack to saturate - - punpcklbw mm3, mm0 - paddsw mm3, mm5 - packuswb mm3, mm0 ; pack and unpack to saturate - - punpcklbw mm4, mm0 - paddsw mm4, mm5 - packuswb mm4, mm0 ; pack and unpack to saturate - - movd [rax], mm1 - movd [rax+rdx], mm2 - movd [rax+2*rdx], mm3 - movd [rax+rcx], mm4 - - ; begin epilog - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -x_s1sqr2: - times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 4 dw 0x4E7B -align 16 -fours: - times 4 dw 0x0004 diff --git a/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm b/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm deleted file mode 100644 index bf8e2c4021..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm +++ /dev/null @@ -1,708 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp8_idct_dequant_0_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; ) - -global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE -sym(vp8_idct_dequant_0_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - GET_GOT rbx - ; end prolog - - mov rdx, arg(1) ; dequant - mov rax, arg(0) ; qcoeff - - movd xmm4, [rax] - movd xmm5, [rdx] - - pinsrw xmm4, [rax+32], 4 - pinsrw xmm5, [rdx], 4 - - pmullw xmm4, xmm5 - - ; Zero out xmm5, for use unpacking - pxor xmm5, xmm5 - - ; clear coeffs - movd [rax], xmm5 - movd [rax+32], xmm5 -;pshufb - mov rax, arg(2) ; dst - movsxd rdx, dword ptr arg(3) ; dst_stride - - pshuflw xmm4, xmm4, 00000000b - pshufhw xmm4, xmm4, 00000000b - - lea rcx, [rdx + rdx*2] - paddw xmm4, [GLOBAL(fours)] - - psraw xmm4, 3 - - movq xmm0, [rax] - movq xmm1, [rax+rdx] - movq xmm2, [rax+2*rdx] - movq xmm3, [rax+rcx] - - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - - - ; Add to predict buffer - paddw xmm0, xmm4 - paddw xmm1, xmm4 - paddw xmm2, xmm4 - paddw xmm3, xmm4 - - ; pack up before storing - packuswb xmm0, xmm5 - packuswb xmm1, xmm5 - packuswb xmm2, xmm5 - packuswb xmm3, xmm5 - - ; store blocks back out - movq [rax], xmm0 - movq [rax + rdx], xmm1 - - lea rax, [rax + 2*rdx] - - movq [rax], xmm2 - movq [rax + rdx], xmm3 - - ; begin epilog - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_idct_dequant_full_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; ) -global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE -sym(vp8_idct_dequant_full_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - mov rdx, arg(1) ; dequant - mov rdi, arg(2) ; dst - - - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - - - ; note the transpose of xmm1 and xmm2, necessary for shuffle - ; to spit out sensicle data - movdqa xmm0, [rax] - movdqa xmm2, [rax+16] - movdqa xmm1, [rax+32] - movdqa xmm3, [rax+48] - - ; Clear out coeffs - movdqa [rax], xmm7 - movdqa [rax+16], xmm7 - movdqa [rax+32], xmm7 - movdqa [rax+48], xmm7 - - ; dequantize qcoeff buffer - pmullw xmm0, [rdx] - pmullw xmm2, [rdx+16] - pmullw xmm1, [rdx] - pmullw xmm3, [rdx+16] - movsxd rdx, dword ptr arg(3) ; dst_stride - - ; repack so block 0 row x and block 1 row x are together - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - punpckhdq xmm4, xmm1 - - pshufd xmm0, xmm0, 11011000b - pshufd xmm1, xmm4, 11011000b - - movdqa xmm4, xmm2 - punpckldq xmm2, xmm3 - punpckhdq xmm4, xmm3 - - pshufd xmm2, xmm2, 11011000b - pshufd xmm3, xmm4, 11011000b - - ; first pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 ; - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - lea rcx, [rdx + rdx*2] ;dst_stride * 3 - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - - ; transpose for the second pass - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - ; second pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - paddw xmm0, [GLOBAL(fours)] - - paddw xmm2, [GLOBAL(fours)] - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - psraw xmm2, 3 - - psraw xmm0, 3 - psraw xmm4, 3 - - psraw xmm6, 3 - - ; transpose to save - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - pxor xmm7, xmm7 - - ; Load up predict blocks - movq xmm4, [rdi] - movq xmm5, [rdi+rdx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - - movq xmm4, [rdi+2*rdx] - movq xmm5, [rdi+rcx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm2, xmm4 - paddw xmm3, xmm5 - -.finish: - - ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - movq [rdi + rdx*2], xmm2 - movq [rdi + rcx], xmm3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_idct_dequant_dc_0_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; short *dc - 4 -; ) -global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE -sym(vp8_idct_dequant_dc_0_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - - mov rdi, arg(2) ; dst - mov rdx, arg(4) ; dc - - ; Zero out xmm5, for use unpacking - pxor xmm5, xmm5 - - ; load up 2 dc words here == 2*16 = doubleword - movd xmm4, [rdx] - - movsxd rdx, dword ptr arg(3) ; dst_stride - lea rcx, [rdx + rdx*2] - ; Load up predict blocks - movq xmm0, [rdi] - movq xmm1, [rdi+rdx*1] - movq xmm2, [rdi+rdx*2] - movq xmm3, [rdi+rcx] - - ; Duplicate and expand dc across - punpcklwd xmm4, xmm4 - punpckldq xmm4, xmm4 - - ; Rounding to dequant and downshift - paddw xmm4, [GLOBAL(fours)] - psraw xmm4, 3 - - ; Predict buffer needs to be expanded from bytes to words - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - - ; Add to predict buffer - paddw xmm0, xmm4 - paddw xmm1, xmm4 - paddw xmm2, xmm4 - paddw xmm3, xmm4 - - ; pack up before storing - packuswb xmm0, xmm5 - packuswb xmm1, xmm5 - packuswb xmm2, xmm5 - packuswb xmm3, xmm5 - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - movq [rdi + rdx*2], xmm2 - movq [rdi + rcx], xmm3 - - ; begin epilog - pop rdi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret -;void vp8_idct_dequant_dc_full_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; short *dc - 4 -; ) -global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE -sym(vp8_idct_dequant_dc_full_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - mov rdx, arg(1) ; dequant - - mov rdi, arg(2) ; dst - - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - - - ; note the transpose of xmm1 and xmm2, necessary for shuffle - ; to spit out sensicle data - movdqa xmm0, [rax] - movdqa xmm2, [rax+16] - movdqa xmm1, [rax+32] - movdqa xmm3, [rax+48] - - ; Clear out coeffs - movdqa [rax], xmm7 - movdqa [rax+16], xmm7 - movdqa [rax+32], xmm7 - movdqa [rax+48], xmm7 - - ; dequantize qcoeff buffer - pmullw xmm0, [rdx] - pmullw xmm2, [rdx+16] - pmullw xmm1, [rdx] - pmullw xmm3, [rdx+16] - - ; DC component - mov rdx, arg(4) - - ; repack so block 0 row x and block 1 row x are together - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - punpckhdq xmm4, xmm1 - - pshufd xmm0, xmm0, 11011000b - pshufd xmm1, xmm4, 11011000b - - movdqa xmm4, xmm2 - punpckldq xmm2, xmm3 - punpckhdq xmm4, xmm3 - - pshufd xmm2, xmm2, 11011000b - pshufd xmm3, xmm4, 11011000b - - ; insert DC component - pinsrw xmm0, [rdx], 0 - pinsrw xmm0, [rdx+2], 4 - - ; first pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 ; - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - - ; transpose for the second pass - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - ; second pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - paddw xmm0, [GLOBAL(fours)] - - paddw xmm2, [GLOBAL(fours)] - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - psraw xmm2, 3 - - psraw xmm0, 3 - psraw xmm4, 3 - - psraw xmm6, 3 - - ; transpose to save - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - pxor xmm7, xmm7 - - ; Load up predict blocks - movsxd rdx, dword ptr arg(3) ; dst_stride - movq xmm4, [rdi] - movq xmm5, [rdi+rdx] - lea rcx, [rdx + rdx*2] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - - movq xmm4, [rdi+rdx*2] - movq xmm5, [rdi+rcx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm2, xmm4 - paddw xmm3, xmm5 - -.finish: - - ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 - - ; Load destination stride before writing out, - ; doesn't need to persist - movsxd rdx, dword ptr arg(3) ; dst_stride - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - - lea rdi, [rdi + 2*rdx] - - movq [rdi], xmm2 - movq [rdi + rdx], xmm3 - - - ; begin epilog - pop rdi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -fours: - times 8 dw 0x0004 -align 16 -x_s1sqr2: - times 8 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 8 dw 0x4E7B diff --git a/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm b/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm deleted file mode 100644 index 158c3b7458..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm +++ /dev/null @@ -1,140 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) -global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE -sym(vp8_short_inv_walsh4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - ; end prolog - - mov rdx, arg(0) - mov rax, 30003h - - movq mm0, [rdx + 0] ;ip[0] - movq mm1, [rdx + 8] ;ip[4] - movq mm7, rax - - movq mm2, [rdx + 16] ;ip[8] - movq mm3, [rdx + 24] ;ip[12] - punpcklwd mm7, mm7 ;0003000300030003h - mov rdx, arg(1) - - movq mm4, mm0 - movq mm5, mm1 - - paddw mm4, mm3 ;ip[0] + ip[12] aka al - paddw mm5, mm2 ;ip[4] + ip[8] aka bl - - movq mm6, mm4 ;temp al - paddw mm4, mm5 ;al + bl - psubw mm6, mm5 ;al - bl - - psubw mm0, mm3 ;ip[0] - ip[12] aka d1 - psubw mm1, mm2 ;ip[4] - ip[8] aka c1 - - movq mm5, mm0 ;temp dl - paddw mm0, mm1 ;dl + cl - psubw mm5, mm1 ;dl - cl - - ; 03 02 01 00 - ; 13 12 11 10 - ; 23 22 21 20 - ; 33 32 31 30 - - movq mm3, mm4 ; 03 02 01 00 - punpcklwd mm4, mm0 ; 11 01 10 00 - punpckhwd mm3, mm0 ; 13 03 12 02 - - movq mm1, mm6 ; 23 22 21 20 - punpcklwd mm6, mm5 ; 31 21 30 20 - punpckhwd mm1, mm5 ; 33 23 32 22 - - movq mm0, mm4 ; 11 01 10 00 - movq mm2, mm3 ; 13 03 12 02 - - punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] - punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] - - punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] - punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] -;~~~~~~~~~~~~~~~~~~~~~ - movq mm1, mm0 - movq mm5, mm4 - paddw mm1, mm3 ;ip[0] + ip[12] aka al - paddw mm5, mm2 ;ip[4] + ip[8] aka bl - - movq mm6, mm1 ;temp al - paddw mm1, mm5 ;al + bl - psubw mm6, mm5 ;al - bl - paddw mm1, mm7 - paddw mm6, mm7 - psraw mm1, 3 - psraw mm6, 3 - - psubw mm0, mm3 ;ip[0] - ip[12] aka d1 - psubw mm4, mm2 ;ip[4] - ip[8] aka c1 - - movq mm5, mm0 ;temp dl - paddw mm0, mm4 ;dl + cl - psubw mm5, mm4 ;dl - cl - paddw mm0, mm7 - paddw mm5, mm7 - psraw mm0, 3 - psraw mm5, 3 -;~~~~~~~~~~~~~~~~~~~~~ - - movd eax, mm1 - movd ecx, mm0 - psrlq mm0, 32 - psrlq mm1, 32 - mov word ptr[rdx+32*0], ax - mov word ptr[rdx+32*1], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*4], ax - mov word ptr[rdx+32*5], cx - movd eax, mm1 - movd ecx, mm0 - mov word ptr[rdx+32*8], ax - mov word ptr[rdx+32*9], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*12], ax - mov word ptr[rdx+32*13], cx - - movd eax, mm6 - movd ecx, mm5 - psrlq mm5, 32 - psrlq mm6, 32 - mov word ptr[rdx+32*2], ax - mov word ptr[rdx+32*3], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*6], ax - mov word ptr[rdx+32*7], cx - movd eax, mm6 - movd ecx, mm5 - mov word ptr[rdx+32*10], ax - mov word ptr[rdx+32*11], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*14], ax - mov word ptr[rdx+32*15], cx - - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - diff --git a/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm b/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm deleted file mode 100644 index 06e86a80b6..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm +++ /dev/null @@ -1,121 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) -global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE -sym(vp8_short_inv_walsh4x4_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - ; end prolog - - mov rcx, arg(0) - mov rdx, arg(1) - mov rax, 30003h - - movdqa xmm0, [rcx + 0] ;ip[4] ip[0] - movdqa xmm1, [rcx + 16] ;ip[12] ip[8] - - - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm0 ;ip[4] ip[0] - - paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 - - movdqa xmm4, xmm0 - punpcklqdq xmm0, xmm3 ;d1 a1 - punpckhqdq xmm4, xmm3 ;c1 b1 - - movdqa xmm1, xmm4 ;c1 b1 - paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] - - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; 13 12 11 10 03 02 01 00 - ; - ; 33 32 31 30 23 22 21 20 - ; - movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00 - punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00 - punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10 - movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00 - punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02 - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - movd xmm0, eax - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm4 ;ip[4] ip[0] - - pshufd xmm0, xmm0, 0 ;03 03 03 03 03 03 03 03 - - paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 - - movdqa xmm5, xmm4 - punpcklqdq xmm4, xmm3 ;d1 a1 - punpckhqdq xmm5, xmm3 ;c1 b1 - - movdqa xmm1, xmm5 ;c1 b1 - paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] - - paddw xmm5, xmm0 - paddw xmm4, xmm0 - psraw xmm5, 3 - psraw xmm4, 3 - - movd eax, xmm5 - movd ecx, xmm4 - psrldq xmm5, 4 - psrldq xmm4, 4 - mov word ptr[rdx+32*0], ax - mov word ptr[rdx+32*2], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*4], ax - mov word ptr[rdx+32*6], cx - movd eax, xmm5 - movd ecx, xmm4 - psrldq xmm5, 4 - psrldq xmm4, 4 - mov word ptr[rdx+32*8], ax - mov word ptr[rdx+32*10], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*12], ax - mov word ptr[rdx+32*14], cx - - movd eax, xmm5 - movd ecx, xmm4 - psrldq xmm5, 4 - psrldq xmm4, 4 - mov word ptr[rdx+32*1], ax - mov word ptr[rdx+32*3], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*5], ax - mov word ptr[rdx+32*7], cx - movd eax, xmm5 - movd ecx, xmm4 - mov word ptr[rdx+32*9], ax - mov word ptr[rdx+32*11], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*13], ax - mov word ptr[rdx+32*15], cx - - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm b/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm deleted file mode 100644 index 6d5aaa19db..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm +++ /dev/null @@ -1,815 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro LF_ABS 2 - ; %1 value not preserved - ; %2 value preserved - ; output in %1 - movdqa scratch1, %2 ; v2 - - psubusb scratch1, %1 ; v2 - v1 - psubusb %1, %2 ; v1 - v2 - por %1, scratch1 ; abs(v2 - v1) -%endmacro - -%macro LF_FILTER_HEV_MASK 8-9 - - LF_ABS %1, %2 ; abs(p3 - p2) - LF_ABS %2, %3 ; abs(p2 - p1) - pmaxub %1, %2 ; accumulate mask -%if %0 == 8 - movdqa scratch2, %3 ; save p1 - LF_ABS scratch2, %4 ; abs(p1 - p0) -%endif - LF_ABS %4, %5 ; abs(p0 - q0) - LF_ABS %5, %6 ; abs(q0 - q1) -%if %0 == 8 - pmaxub %5, scratch2 ; accumulate hev -%else - pmaxub %5, %9 -%endif - pmaxub %1, %5 ; accumulate mask - - LF_ABS %3, %6 ; abs(p1 - q1) - LF_ABS %6, %7 ; abs(q1 - q2) - pmaxub %1, %6 ; accumulate mask - LF_ABS %7, %8 ; abs(q2 - q3) - pmaxub %1, %7 ; accumulate mask - - paddusb %4, %4 ; 2 * abs(p0 - q0) - pand %3, [GLOBAL(tfe)] - psrlw %3, 1 ; abs(p1 - q1) / 2 - paddusb %4, %3 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - - psubusb %1, [limit] - psubusb %4, [blimit] - por %1, %4 - pcmpeqb %1, zero ; mask - - psubusb %5, [thresh] - pcmpeqb %5, zero ; ~hev -%endmacro - -%macro LF_FILTER 6 - ; %1-%4: p1-q1 - ; %5: mask - ; %6: hev - - movdqa scratch2, %6 ; save hev - - pxor %1, [GLOBAL(t80)] ; ps1 - pxor %4, [GLOBAL(t80)] ; qs1 - movdqa scratch1, %1 - psubsb scratch1, %4 ; signed_char_clamp(ps1 - qs1) - pandn scratch2, scratch1 ; vp8_filter &= hev - - pxor %2, [GLOBAL(t80)] ; ps0 - pxor %3, [GLOBAL(t80)] ; qs0 - movdqa scratch1, %3 - psubsb scratch1, %2 ; qs0 - ps0 - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - pand %5, scratch2 ; &= mask - - movdqa scratch2, %5 - paddsb %5, [GLOBAL(t4)] ; Filter1 - paddsb scratch2, [GLOBAL(t3)] ; Filter2 - - ; Filter1 >> 3 - movdqa scratch1, zero - pcmpgtb scratch1, %5 - psrlw %5, 3 - pand scratch1, [GLOBAL(te0)] - pand %5, [GLOBAL(t1f)] - por %5, scratch1 - - psubsb %3, %5 ; qs0 - Filter1 - pxor %3, [GLOBAL(t80)] - - ; Filter2 >> 3 - movdqa scratch1, zero - pcmpgtb scratch1, scratch2 - psrlw scratch2, 3 - pand scratch1, [GLOBAL(te0)] - pand scratch2, [GLOBAL(t1f)] - por scratch2, scratch1 - - paddsb %2, scratch2 ; ps0 + Filter2 - pxor %2, [GLOBAL(t80)] - - ; outer tap adjustments - paddsb %5, [GLOBAL(t1)] - movdqa scratch1, zero - pcmpgtb scratch1, %5 - psrlw %5, 1 - pand scratch1, [GLOBAL(t80)] - pand %5, [GLOBAL(t7f)] - por %5, scratch1 - pand %5, %6 ; vp8_filter &= ~hev - - psubsb %4, %5 ; qs1 - vp8_filter - pxor %4, [GLOBAL(t80)] - - paddsb %1, %5 ; ps1 + vp8_filter - pxor %1, [GLOBAL(t80)] -%endmacro - -;void vp8_loop_filter_bh_y_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh -;) -global sym(vp8_loop_filter_bh_y_sse2) PRIVATE -sym(vp8_loop_filter_bh_y_sse2): - -%if LIBVPX_YASM_WIN64 - %define src rcx ; src_ptr - %define stride rdx ; src_pixel_step - %define blimit r8 - %define limit r9 - %define thresh r10 - - %define spp rax - %define stride3 r11 - %define stride5 r12 - %define stride7 r13 - - push rbp - mov rbp, rsp - SAVE_XMM 11 - push r12 - push r13 - mov thresh, arg(4) -%else - %define src rdi ; src_ptr - %define stride rsi ; src_pixel_step - %define blimit rdx - %define limit rcx - %define thresh r8 - - %define spp rax - %define stride3 r9 - %define stride5 r10 - %define stride7 r11 -%endif - - %define scratch1 xmm5 - %define scratch2 xmm6 - %define zero xmm7 - - %define i0 [src] - %define i1 [spp] - %define i2 [src + 2 * stride] - %define i3 [spp + 2 * stride] - %define i4 [src + 4 * stride] - %define i5 [spp + 4 * stride] - %define i6 [src + 2 * stride3] - %define i7 [spp + 2 * stride3] - %define i8 [src + 8 * stride] - %define i9 [spp + 8 * stride] - %define i10 [src + 2 * stride5] - %define i11 [spp + 2 * stride5] - %define i12 [src + 4 * stride3] - %define i13 [spp + 4 * stride3] - %define i14 [src + 2 * stride7] - %define i15 [spp + 2 * stride7] - - ; prep work - lea spp, [src + stride] - lea stride3, [stride + 2 * stride] - lea stride5, [stride3 + 2 * stride] - lea stride7, [stride3 + 4 * stride] - pxor zero, zero - - ; load the first set into registers - movdqa xmm0, i0 - movdqa xmm1, i1 - movdqa xmm2, i2 - movdqa xmm3, i3 - movdqa xmm4, i4 - movdqa xmm8, i5 - movdqa xmm9, i6 ; q2, will contain abs(p1-p0) - movdqa xmm10, i7 -LF_FILTER_HEV_MASK xmm0, xmm1, xmm2, xmm3, xmm4, xmm8, xmm9, xmm10 - - movdqa xmm1, i2 - movdqa xmm2, i3 - movdqa xmm3, i4 - movdqa xmm8, i5 -LF_FILTER xmm1, xmm2, xmm3, xmm8, xmm0, xmm4 - movdqa i2, xmm1 - movdqa i3, xmm2 - -; second set - movdqa i4, xmm3 - movdqa i5, xmm8 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm2, i8 - movdqa xmm4, i9 - movdqa xmm10, i10 ; q2, will contain abs(p1-p0) - movdqa xmm11, i11 -LF_FILTER_HEV_MASK xmm3, xmm8, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm9 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm4, i8 - movdqa xmm8, i9 -LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 - movdqa i6, xmm0 - movdqa i7, xmm1 - -; last set - movdqa i8, xmm4 - movdqa i9, xmm8 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm2, i12 - movdqa xmm3, i13 - movdqa xmm9, i14 ; q2, will contain abs(p1-p0) - movdqa xmm11, i15 -LF_FILTER_HEV_MASK xmm4, xmm8, xmm0, xmm1, xmm2, xmm3, xmm9, xmm11, xmm10 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm3, i12 - movdqa xmm8, i13 -LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 - movdqa i10, xmm0 - movdqa i11, xmm1 - movdqa i12, xmm3 - movdqa i13, xmm8 - -%if LIBVPX_YASM_WIN64 - pop r13 - pop r12 - RESTORE_XMM - pop rbp -%endif - - ret - - -;void vp8_loop_filter_bv_y_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh -;) - -global sym(vp8_loop_filter_bv_y_sse2) PRIVATE -sym(vp8_loop_filter_bv_y_sse2): - -%if LIBVPX_YASM_WIN64 - %define src rcx ; src_ptr - %define stride rdx ; src_pixel_step - %define blimit r8 - %define limit r9 - %define thresh r10 - - %define spp rax - %define stride3 r11 - %define stride5 r12 - %define stride7 r13 - - push rbp - mov rbp, rsp - SAVE_XMM 15 - push r12 - push r13 - mov thresh, arg(4) -%else - %define src rdi - %define stride rsi - %define blimit rdx - %define limit rcx - %define thresh r8 - - %define spp rax - %define stride3 r9 - %define stride5 r10 - %define stride7 r11 -%endif - - %define scratch1 xmm5 - %define scratch2 xmm6 - %define zero xmm7 - - %define s0 [src] - %define s1 [spp] - %define s2 [src + 2 * stride] - %define s3 [spp + 2 * stride] - %define s4 [src + 4 * stride] - %define s5 [spp + 4 * stride] - %define s6 [src + 2 * stride3] - %define s7 [spp + 2 * stride3] - %define s8 [src + 8 * stride] - %define s9 [spp + 8 * stride] - %define s10 [src + 2 * stride5] - %define s11 [spp + 2 * stride5] - %define s12 [src + 4 * stride3] - %define s13 [spp + 4 * stride3] - %define s14 [src + 2 * stride7] - %define s15 [spp + 2 * stride7] - - %define i0 [rsp] - %define i1 [rsp + 16] - %define i2 [rsp + 32] - %define i3 [rsp + 48] - %define i4 [rsp + 64] - %define i5 [rsp + 80] - %define i6 [rsp + 96] - %define i7 [rsp + 112] - %define i8 [rsp + 128] - %define i9 [rsp + 144] - %define i10 [rsp + 160] - %define i11 [rsp + 176] - %define i12 [rsp + 192] - %define i13 [rsp + 208] - %define i14 [rsp + 224] - %define i15 [rsp + 240] - - ALIGN_STACK 16, rax - - ; reserve stack space - %define temp_storage 0 ; size is 256 (16*16) - %define stack_size 256 - sub rsp, stack_size - - ; prep work - lea spp, [src + stride] - lea stride3, [stride + 2 * stride] - lea stride5, [stride3 + 2 * stride] - lea stride7, [stride3 + 4 * stride] - - ; 8-f - movdqa xmm0, s8 - movdqa xmm1, xmm0 - punpcklbw xmm0, s9 ; 80 90 - punpckhbw xmm1, s9 ; 88 98 - - movdqa xmm2, s10 - movdqa xmm3, xmm2 - punpcklbw xmm2, s11 ; a0 b0 - punpckhbw xmm3, s11 ; a8 b8 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 80 90 a0 b0 - punpckhwd xmm4, xmm2 ; 84 94 a4 b4 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 88 98 a8 b8 - punpckhwd xmm2, xmm3 ; 8c 9c ac bc - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, s12 - movdqa xmm5, xmm3 - punpcklbw xmm3, s13 ; c0 d0 - punpckhbw xmm5, s13 ; c8 d8 - - movdqa xmm6, s14 - movdqa xmm7, xmm6 - punpcklbw xmm6, s15 ; e0 f0 - punpckhbw xmm7, s15 ; e8 f8 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 - punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 - punpckhwd xmm6, xmm7 ; cc dc ec fc - - ; pull the third and fourth sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 - punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 - punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 - punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc - punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe - - ; save the calculations. we only have 15 registers ... - movdqa i0, xmm0 - movdqa i1, xmm7 - movdqa i2, xmm4 - movdqa i3, xmm3 - movdqa i4, xmm1 - movdqa i5, xmm8 - movdqa i6, xmm2 - movdqa i7, xmm5 - - ; 0-7 - movdqa xmm0, s0 - movdqa xmm1, xmm0 - punpcklbw xmm0, s1 ; 00 10 - punpckhbw xmm1, s1 ; 08 18 - - movdqa xmm2, s2 - movdqa xmm3, xmm2 - punpcklbw xmm2, s3 ; 20 30 - punpckhbw xmm3, s3 ; 28 38 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 00 10 20 30 - punpckhwd xmm4, xmm2 ; 04 14 24 34 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 08 18 28 38 - punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, s4 - movdqa xmm5, xmm3 - punpcklbw xmm3, s5 ; 40 50 - punpckhbw xmm5, s5 ; 48 58 - - movdqa xmm6, s6 - movdqa xmm7, xmm6 - punpcklbw xmm6, s7 ; 60 70 - punpckhbw xmm7, s7 ; 68 78 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; 40 50 60 70 - punpckhwd xmm8, xmm6 ; 44 54 64 74 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; 48 58 68 78 - punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c - - ; pull the first two sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 - punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 - punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 - punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c - punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e - ; final combination - - movdqa xmm6, xmm0 - punpcklqdq xmm0, i0 - punpckhqdq xmm6, i0 - - movdqa xmm9, xmm7 - punpcklqdq xmm7, i1 - punpckhqdq xmm9, i1 - - movdqa xmm10, xmm4 - punpcklqdq xmm4, i2 - punpckhqdq xmm10, i2 - - movdqa xmm11, xmm3 - punpcklqdq xmm3, i3 - punpckhqdq xmm11, i3 - - movdqa xmm12, xmm1 - punpcklqdq xmm1, i4 - punpckhqdq xmm12, i4 - - movdqa xmm13, xmm8 - punpcklqdq xmm8, i5 - punpckhqdq xmm13, i5 - - movdqa xmm14, xmm2 - punpcklqdq xmm2, i6 - punpckhqdq xmm14, i6 - - movdqa xmm15, xmm5 - punpcklqdq xmm5, i7 - punpckhqdq xmm15, i7 - - movdqa i0, xmm0 - movdqa i1, xmm6 - movdqa i2, xmm7 - movdqa i3, xmm9 - movdqa i4, xmm4 - movdqa i5, xmm10 - movdqa i6, xmm3 - movdqa i7, xmm11 - movdqa i8, xmm1 - movdqa i9, xmm12 - movdqa i10, xmm8 - movdqa i11, xmm13 - movdqa i12, xmm2 - movdqa i13, xmm14 - movdqa i14, xmm5 - movdqa i15, xmm15 - -; TRANSPOSED DATA AVAILABLE ON THE STACK - - movdqa xmm12, xmm6 - movdqa xmm13, xmm7 - - pxor zero, zero - -LF_FILTER_HEV_MASK xmm0, xmm12, xmm13, xmm9, xmm4, xmm10, xmm3, xmm11 - - movdqa xmm1, i2 - movdqa xmm2, i3 - movdqa xmm8, i4 - movdqa xmm9, i5 -LF_FILTER xmm1, xmm2, xmm8, xmm9, xmm0, xmm4 - movdqa i2, xmm1 - movdqa i3, xmm2 - -; second set - movdqa i4, xmm8 - movdqa i5, xmm9 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm2, i8 - movdqa xmm4, i9 - movdqa xmm10, i10 ; q2, will contain abs(p1-p0) - movdqa xmm11, i11 -LF_FILTER_HEV_MASK xmm8, xmm9, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm3 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm3, i8 - movdqa xmm4, i9 -LF_FILTER xmm0, xmm1, xmm3, xmm4, xmm8, xmm2 - movdqa i6, xmm0 - movdqa i7, xmm1 - -; last set - movdqa i8, xmm3 - movdqa i9, xmm4 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm2, i12 - movdqa xmm8, i13 - movdqa xmm9, i14 ; q2, will contain abs(p1-p0) - movdqa xmm11, i15 -LF_FILTER_HEV_MASK xmm3, xmm4, xmm0, xmm1, xmm2, xmm8, xmm9, xmm11, xmm10 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm4, i12 - movdqa xmm8, i13 -LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 - movdqa i10, xmm0 - movdqa i11, xmm1 - movdqa i12, xmm4 - movdqa i13, xmm8 - - -; RESHUFFLE AND WRITE OUT - ; 8-f - movdqa xmm0, i8 - movdqa xmm1, xmm0 - punpcklbw xmm0, i9 ; 80 90 - punpckhbw xmm1, i9 ; 88 98 - - movdqa xmm2, i10 - movdqa xmm3, xmm2 - punpcklbw xmm2, i11 ; a0 b0 - punpckhbw xmm3, i11 ; a8 b8 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 80 90 a0 b0 - punpckhwd xmm4, xmm2 ; 84 94 a4 b4 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 88 98 a8 b8 - punpckhwd xmm2, xmm3 ; 8c 9c ac bc - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, i12 - movdqa xmm5, xmm3 - punpcklbw xmm3, i13 ; c0 d0 - punpckhbw xmm5, i13 ; c8 d8 - - movdqa xmm6, i14 - movdqa xmm7, xmm6 - punpcklbw xmm6, i15 ; e0 f0 - punpckhbw xmm7, i15 ; e8 f8 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 - punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 - punpckhwd xmm6, xmm7 ; cc dc ec fc - - ; pull the third and fourth sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 - punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 - punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 - punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc - punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe - - ; save the calculations. we only have 15 registers ... - movdqa i8, xmm0 - movdqa i9, xmm7 - movdqa i10, xmm4 - movdqa i11, xmm3 - movdqa i12, xmm1 - movdqa i13, xmm8 - movdqa i14, xmm2 - movdqa i15, xmm5 - - ; 0-7 - movdqa xmm0, i0 - movdqa xmm1, xmm0 - punpcklbw xmm0, i1 ; 00 10 - punpckhbw xmm1, i1 ; 08 18 - - movdqa xmm2, i2 - movdqa xmm3, xmm2 - punpcklbw xmm2, i3 ; 20 30 - punpckhbw xmm3, i3 ; 28 38 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 00 10 20 30 - punpckhwd xmm4, xmm2 ; 04 14 24 34 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 08 18 28 38 - punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, i4 - movdqa xmm5, xmm3 - punpcklbw xmm3, i5 ; 40 50 - punpckhbw xmm5, i5 ; 48 58 - - movdqa xmm6, i6 - movdqa xmm7, xmm6 - punpcklbw xmm6, i7 ; 60 70 - punpckhbw xmm7, i7 ; 68 78 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; 40 50 60 70 - punpckhwd xmm8, xmm6 ; 44 54 64 74 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; 48 58 68 78 - punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c - - ; pull the first two sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 - punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 - punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 - punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c - punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e - ; final combination - - movdqa xmm6, xmm0 - punpcklqdq xmm0, i8 - punpckhqdq xmm6, i8 - - movdqa xmm9, xmm7 - punpcklqdq xmm7, i9 - punpckhqdq xmm9, i9 - - movdqa xmm10, xmm4 - punpcklqdq xmm4, i10 - punpckhqdq xmm10, i10 - - movdqa xmm11, xmm3 - punpcklqdq xmm3, i11 - punpckhqdq xmm11, i11 - - movdqa xmm12, xmm1 - punpcklqdq xmm1, i12 - punpckhqdq xmm12, i12 - - movdqa xmm13, xmm8 - punpcklqdq xmm8, i13 - punpckhqdq xmm13, i13 - - movdqa xmm14, xmm2 - punpcklqdq xmm2, i14 - punpckhqdq xmm14, i14 - - movdqa xmm15, xmm5 - punpcklqdq xmm5, i15 - punpckhqdq xmm15, i15 - - movdqa s0, xmm0 - movdqa s1, xmm6 - movdqa s2, xmm7 - movdqa s3, xmm9 - movdqa s4, xmm4 - movdqa s5, xmm10 - movdqa s6, xmm3 - movdqa s7, xmm11 - movdqa s8, xmm1 - movdqa s9, xmm12 - movdqa s10, xmm8 - movdqa s11, xmm13 - movdqa s12, xmm2 - movdqa s13, xmm14 - movdqa s14, xmm5 - movdqa s15, xmm15 - - ; free stack space - add rsp, stack_size - - ; un-ALIGN_STACK - pop rsp - -%if LIBVPX_YASM_WIN64 - pop r13 - pop r12 - RESTORE_XMM - pop rbp -%endif - - ret - -SECTION_RODATA -align 16 -te0: - times 16 db 0xe0 -align 16 -t7f: - times 16 db 0x7f -align 16 -tfe: - times 16 db 0xfe -align 16 -t1f: - times 16 db 0x1f -align 16 -t80: - times 16 db 0x80 -align 16 -t1: - times 16 db 0x01 -align 16 -t3: - times 16 db 0x03 -align 16 -t4: - times 16 db 0x04 diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm b/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm deleted file mode 100644 index 1913abc69b..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm +++ /dev/null @@ -1,1640 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%define _t0 0 -%define _t1 _t0 + 16 -%define _p3 _t1 + 16 -%define _p2 _p3 + 16 -%define _p1 _p2 + 16 -%define _p0 _p1 + 16 -%define _q0 _p0 + 16 -%define _q1 _q0 + 16 -%define _q2 _q1 + 16 -%define _q3 _q2 + 16 -%define lf_var_size 160 - -; Use of pmaxub instead of psubusb to compute filter mask was seen -; in ffvp8 - -%macro LFH_FILTER_AND_HEV_MASK 1 -%if %1 - movdqa xmm2, [rdi+2*rax] ; q3 - movdqa xmm1, [rsi+2*rax] ; q2 - movdqa xmm4, [rsi+rax] ; q1 - movdqa xmm5, [rsi] ; q0 - neg rax ; negate pitch to deal with above border -%else - movlps xmm2, [rsi + rcx*2] ; q3 - movlps xmm1, [rsi + rcx] ; q2 - movlps xmm4, [rsi] ; q1 - movlps xmm5, [rsi + rax] ; q0 - - movhps xmm2, [rdi + rcx*2] - movhps xmm1, [rdi + rcx] - movhps xmm4, [rdi] - movhps xmm5, [rdi + rax] - - lea rsi, [rsi + rax*4] - lea rdi, [rdi + rax*4] - - movdqa [rsp+_q2], xmm1 ; store q2 - movdqa [rsp+_q1], xmm4 ; store q1 -%endif - movdqa xmm7, [rdx] ;limit - - movdqa xmm6, xmm1 ; q2 - movdqa xmm3, xmm4 ; q1 - - psubusb xmm1, xmm2 ; q2-=q3 - psubusb xmm2, xmm6 ; q3-=q2 - - psubusb xmm4, xmm6 ; q1-=q2 - psubusb xmm6, xmm3 ; q2-=q1 - - por xmm4, xmm6 ; abs(q2-q1) - por xmm1, xmm2 ; abs(q3-q2) - - movdqa xmm0, xmm5 ; q0 - pmaxub xmm1, xmm4 - - psubusb xmm5, xmm3 ; q0-=q1 - psubusb xmm3, xmm0 ; q1-=q0 - - por xmm5, xmm3 ; abs(q0-q1) - movdqa [rsp+_t0], xmm5 ; save to t0 - - pmaxub xmm1, xmm5 - -%if %1 - movdqa xmm2, [rsi+4*rax] ; p3 - movdqa xmm4, [rdi+4*rax] ; p2 - movdqa xmm6, [rsi+2*rax] ; p1 -%else - movlps xmm2, [rsi + rax] ; p3 - movlps xmm4, [rsi] ; p2 - movlps xmm6, [rsi + rcx] ; p1 - - movhps xmm2, [rdi + rax] - movhps xmm4, [rdi] - movhps xmm6, [rdi + rcx] - - movdqa [rsp+_p2], xmm4 ; store p2 - movdqa [rsp+_p1], xmm6 ; store p1 -%endif - - movdqa xmm5, xmm4 ; p2 - movdqa xmm3, xmm6 ; p1 - - psubusb xmm4, xmm2 ; p2-=p3 - psubusb xmm2, xmm5 ; p3-=p2 - - psubusb xmm3, xmm5 ; p1-=p2 - pmaxub xmm1, xmm4 ; abs(p3 - p2) - - psubusb xmm5, xmm6 ; p2-=p1 - pmaxub xmm1, xmm2 ; abs(p3 - p2) - - pmaxub xmm1, xmm5 ; abs(p2 - p1) - movdqa xmm2, xmm6 ; p1 - - pmaxub xmm1, xmm3 ; abs(p2 - p1) -%if %1 - movdqa xmm4, [rsi+rax] ; p0 - movdqa xmm3, [rdi] ; q1 -%else - movlps xmm4, [rsi + rcx*2] ; p0 - movhps xmm4, [rdi + rcx*2] - movdqa xmm3, [rsp+_q1] ; q1 -%endif - - movdqa xmm5, xmm4 ; p0 - psubusb xmm4, xmm6 ; p0-=p1 - - psubusb xmm6, xmm5 ; p1-=p0 - - por xmm6, xmm4 ; abs(p1 - p0) - mov rdx, arg(2) ; get blimit - - movdqa [rsp+_t1], xmm6 ; save to t1 - - movdqa xmm4, xmm3 ; q1 - pmaxub xmm1, xmm6 - - psubusb xmm3, xmm2 ; q1-=p1 - psubusb xmm2, xmm4 ; p1-=q1 - - psubusb xmm1, xmm7 - por xmm2, xmm3 ; abs(p1-q1) - - movdqa xmm7, [rdx] ; blimit - mov rdx, arg(4) ; hev get thresh - - movdqa xmm3, xmm0 ; q0 - pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - - movdqa xmm6, xmm5 ; p0 - psrlw xmm2, 1 ; abs(p1-q1)/2 - - psubusb xmm5, xmm3 ; p0-=q0 - psubusb xmm3, xmm6 ; q0-=p0 - por xmm5, xmm3 ; abs(p0 - q0) - - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - - movdqa xmm4, [rsp+_t0] ; hev get abs (q1 - q0) - movdqa xmm3, [rsp+_t1] ; get abs (p1 - p0) - - paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - movdqa xmm2, [rdx] ; hev - - psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - psubusb xmm4, xmm2 ; hev - - psubusb xmm3, xmm2 ; hev - por xmm1, xmm5 - - pxor xmm7, xmm7 - paddb xmm4, xmm3 ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb xmm4, xmm5 ; hev - pcmpeqb xmm3, xmm3 ; hev - - pcmpeqb xmm1, xmm7 ; mask xmm1 - pxor xmm4, xmm3 ; hev -%endmacro - -%macro B_FILTER 1 - movdqa xmm3, [GLOBAL(t80)] -%if %1 == 0 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm7, [rsp+_q1] ; q1 -%elif %1 == 1 - movdqa xmm2, [rsi+2*rax] ; p1 - movdqa xmm7, [rdi] ; q1 -%elif %1 == 2 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm6, [rsp+_p0] ; p0 - movdqa xmm0, [rsp+_q0] ; q0 - movdqa xmm7, [rsp+_q1] ; q1 -%endif - - pxor xmm2, xmm3 ; p1 offset to convert to signed values - pxor xmm7, xmm3 ; q1 offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, xmm3 ; offset to convert to signed values - - pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) - pxor xmm0, xmm3 ; offset to convert to signed values - - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand xmm1, xmm2 ; mask filter values we don't care about - - movdqa xmm2, xmm1 - paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - - punpckhbw xmm5, xmm2 ; axbxcxdx - punpcklbw xmm2, xmm2 ; exfxgxhx - - punpcklbw xmm0, xmm1 ; exfxgxhx - psraw xmm5, 11 ; sign extended shift right by 3 - - punpckhbw xmm1, xmm1 ; axbxcxdx - psraw xmm2, 11 ; sign extended shift right by 3 - - packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - psraw xmm0, 11 ; sign extended shift right by 3 - - psraw xmm1, 11 ; sign extended shift right by 3 - movdqa xmm5, xmm0 ; save results - - packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - - paddsb xmm6, xmm2 ; p0+= p0 add - - movdqa xmm2, [GLOBAL(ones)] - paddsw xmm5, xmm2 - paddsw xmm1, xmm2 - psraw xmm5, 1 ; partial shifted one more time for 2nd tap - psraw xmm1, 1 ; partial shifted one more time for 2nd tap - packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - movdqa xmm2, [GLOBAL(t80)] - -%if %1 == 0 - movdqa xmm1, [rsp+_p1] ; p1 - lea rsi, [rsi + rcx*2] - lea rdi, [rdi + rcx*2] -%elif %1 == 1 - movdqa xmm1, [rsi+2*rax] ; p1 -%elif %1 == 2 - movdqa xmm1, [rsp+_p1] ; p1 -%endif - - pandn xmm4, xmm5 ; high edge variance additive - pxor xmm6, xmm2 ; unoffset - - pxor xmm1, xmm2 ; reoffset - psubsb xmm3, xmm0 ; q0-= q0 add - - paddsb xmm1, xmm4 ; p1+= p1 add - pxor xmm3, xmm2 ; unoffset - - pxor xmm1, xmm2 ; unoffset - psubsb xmm7, xmm4 ; q1-= q1 add - - pxor xmm7, xmm2 ; unoffset -%if %1 == 0 - movq [rsi], xmm6 ; p0 - movhps [rdi], xmm6 - movq [rsi + rax], xmm1 ; p1 - movhps [rdi + rax], xmm1 - movq [rsi + rcx], xmm3 ; q0 - movhps [rdi + rcx], xmm3 - movq [rsi + rcx*2], xmm7 ; q1 - movhps [rdi + rcx*2], xmm7 -%elif %1 == 1 - movdqa [rsi+rax], xmm6 ; write back - movdqa [rsi+2*rax], xmm1 ; write back - movdqa [rsi], xmm3 ; write back - movdqa [rdi], xmm7 ; write back -%endif - -%endmacro - -%if ABI_IS_32BIT - -;void vp8_loop_filter_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE -sym(vp8_loop_filter_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step - - mov rdx, arg(3) ;limit - - lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 1 - ; filter and write back the result - B_FILTER 1 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -%endif - -;void vp8_loop_filter_horizontal_edge_uv_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE -sym(vp8_loop_filter_horizontal_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u - mov rdi, arg(5) ; v - movsxd rax, dword ptr arg(1) ; src_pixel_step - mov rcx, rax - neg rax ; negate pitch to deal with above border - - mov rdx, arg(3) ;limit - - lea rsi, [rsi + rcx] - lea rdi, [rdi + rcx] - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 0 - ; filter and write back the result - B_FILTER 0 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -%macro MB_FILTER_AND_WRITEBACK 1 - movdqa xmm3, [GLOBAL(t80)] -%if %1 == 0 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm7, [rsp+_q1] ; q1 -%elif %1 == 1 - movdqa xmm2, [rsi+2*rax] ; p1 - movdqa xmm7, [rdi] ; q1 - - mov rcx, rax - neg rcx -%elif %1 == 2 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm6, [rsp+_p0] ; p0 - movdqa xmm0, [rsp+_q0] ; q0 - movdqa xmm7, [rsp+_q1] ; q1 -%endif - - pxor xmm2, xmm3 ; p1 offset to convert to signed values - pxor xmm7, xmm3 ; q1 offset to convert to signed values - pxor xmm6, xmm3 ; offset to convert to signed values - pxor xmm0, xmm3 ; offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb xmm2, xmm0 ; 2 * (q0 - p0) - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1) - pand xmm1, xmm2 ; mask filter values we don't care about - - movdqa xmm2, xmm1 ; vp8_filter - - pand xmm2, xmm4 ; Filter2 = vp8_filter & hev - pxor xmm0, xmm0 - - pandn xmm4, xmm1 ; vp8_filter&=~hev - pxor xmm1, xmm1 - - punpcklbw xmm0, xmm4 ; Filter 2 (hi) - punpckhbw xmm1, xmm4 ; Filter 2 (lo) - - movdqa xmm5, xmm2 - - movdqa xmm4, [GLOBAL(s9)] - paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3) - paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - - pmulhw xmm1, xmm4 ; Filter 2 (lo) * 9 - pmulhw xmm0, xmm4 ; Filter 2 (hi) * 9 - - punpckhbw xmm7, xmm5 ; axbxcxdx - punpcklbw xmm5, xmm5 ; exfxgxhx - - psraw xmm7, 11 ; sign extended shift right by 3 - - psraw xmm5, 11 ; sign extended shift right by 3 - punpckhbw xmm4, xmm2 ; axbxcxdx - - punpcklbw xmm2, xmm2 ; exfxgxhx - psraw xmm4, 11 ; sign extended shift right by 3 - - packsswb xmm5, xmm7 ; Filter2 >>=3; - psraw xmm2, 11 ; sign extended shift right by 3 - - packsswb xmm2, xmm4 ; Filter1 >>=3; - - paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 - - psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1 - movdqa xmm7, xmm1 - - movdqa xmm4, [GLOBAL(s63)] - movdqa xmm5, xmm0 - movdqa xmm2, xmm5 - paddw xmm0, xmm4 ; Filter 2 (hi) * 9 + 63 - paddw xmm1, xmm4 ; Filter 2 (lo) * 9 + 63 - movdqa xmm4, xmm7 - - paddw xmm5, xmm5 ; Filter 2 (hi) * 18 - - paddw xmm7, xmm7 ; Filter 2 (lo) * 18 - paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63 - - paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63 - paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63 - psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7 - - paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63 - psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7 - psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7 - - packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) - - psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7 - psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7 - psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7 - - packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) - packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) - movdqa xmm7, [GLOBAL(t80)] - -%if %1 == 0 - movdqa xmm1, [rsp+_q1] ; q1 - movdqa xmm4, [rsp+_p1] ; p1 - lea rsi, [rsi+rcx*2] - lea rdi, [rdi+rcx*2] - -%elif %1 == 1 - movdqa xmm1, [rdi] ; q1 - movdqa xmm4, [rsi+rax*2] ; p1 -%elif %1 == 2 - movdqa xmm4, [rsp+_p1] ; p1 - movdqa xmm1, [rsp+_q1] ; q1 -%endif - - pxor xmm1, xmm7 - pxor xmm4, xmm7 - - psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3) - paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3) - psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2) - paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2) - -%if %1 == 1 - movdqa xmm2, [rdi+rax*4] ; p2 - movdqa xmm5, [rdi+rcx] ; q2 -%else - movdqa xmm2, [rsp+_p2] ; p2 - movdqa xmm5, [rsp+_q2] ; q2 -%endif - - pxor xmm1, xmm7 ; *oq1 = sq^0x80; - pxor xmm4, xmm7 ; *op1 = sp^0x80; - pxor xmm2, xmm7 - pxor xmm5, xmm7 - paddsb xmm2, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u) - psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u) - pxor xmm2, xmm7 ; *op2 = sp^0x80; - pxor xmm5, xmm7 ; *oq2 = sq^0x80; - pxor xmm3, xmm7 ; *oq0 = sq^0x80 - pxor xmm6, xmm7 ; *oq0 = sp^0x80 -%if %1 == 0 - movq [rsi], xmm6 ; p0 - movhps [rdi], xmm6 - movq [rsi + rcx], xmm3 ; q0 - movhps [rdi + rcx], xmm3 - lea rdx, [rcx + rcx*2] - movq [rsi+rcx*2], xmm1 ; q1 - movhps [rdi+rcx*2], xmm1 - - movq [rsi + rax], xmm4 ; p1 - movhps [rdi + rax], xmm4 - - movq [rsi+rax*2], xmm2 ; p2 - movhps [rdi+rax*2], xmm2 - - movq [rsi+rdx], xmm5 ; q2 - movhps [rdi+rdx], xmm5 -%elif %1 == 1 - movdqa [rdi+rcx], xmm5 ; q2 - movdqa [rdi], xmm1 ; q1 - movdqa [rsi], xmm3 ; q0 - movdqa [rsi+rax ], xmm6 ; p0 - movdqa [rsi+rax*2], xmm4 ; p1 - movdqa [rdi+rax*4], xmm2 ; p2 -%elif %1 == 2 - movdqa [rsp+_p1], xmm4 ; p1 - movdqa [rsp+_p0], xmm6 ; p0 - movdqa [rsp+_q0], xmm3 ; q0 - movdqa [rsp+_q1], xmm1 ; q1 -%endif - -%endmacro - - -;void vp8_mbloop_filter_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE -sym(vp8_mbloop_filter_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step - mov rdx, arg(3) ;limit - - lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 1 - ; filter and write back the results - MB_FILTER_AND_WRITEBACK 1 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_horizontal_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE -sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u - mov rdi, arg(5) ; v - movsxd rax, dword ptr arg(1) ; src_pixel_step - mov rcx, rax - neg rax ; negate pitch to deal with above border - mov rdx, arg(3) ;limit - - lea rsi, [rsi + rcx] - lea rdi, [rdi + rcx] - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 0 - ; filter and write back the results - MB_FILTER_AND_WRITEBACK 0 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -%macro TRANSPOSE_16X8 2 - movq xmm4, [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00 - movq xmm1, [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10 - movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20 - movq xmm7, [rdi+2*rax] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30 - movq xmm5, [rsi+4*rax] ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40 - movq xmm2, [rdi+4*rax] ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50 - - punpcklbw xmm4, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 - - movq xmm1, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70 - - movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 - punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20 - - movq xmm7, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60 - - punpcklbw xmm5, xmm2 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 -%if %1 - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] -%else - mov rsi, arg(5) ; v_ptr -%endif - - movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 - punpcklbw xmm7, xmm1 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60 - punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 - punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44 - punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - -%if %1 == 0 - lea rdi, [rsi + rax - 4] ; rdi points to row +1 for indirect addressing - lea rsi, [rsi - 4] -%endif - - movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 - - movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 - punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - - punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - - punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - - punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - - movdqa [rsp+_t0], xmm2 ; save to free XMM2 - - movq xmm2, [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80 - movq xmm6, [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90 - movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0 - movq xmm5, [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0 - movq xmm1, [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0 - - punpcklbw xmm2, xmm6 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - - movq xmm6, [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0 - - punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0 - - movq xmm5, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0 - - punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0 - - movq xmm6, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0 - - punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0 - - movdqa xmm6, xmm1 ; - punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4 - - punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 - movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - - punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - - punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - - movdqa xmm0, xmm5 - punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - - punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - - punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84 - - punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86 - movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - - punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 - - punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 - -%if %2 == 0 - movdqa [rsp+_q3], xmm7 ; save 7 - movdqa [rsp+_q2], xmm6 ; save 6 -%endif - movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - movdqa [rsp+_p1], xmm2 ; save 2 - - movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - movdqa [rsp+_p0], xmm3 ; save 3 - - punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 - - movdqa [rsp+_q0], xmm4 ; save 4 - movdqa [rsp+_q1], xmm5 ; save 5 - movdqa xmm1, [rsp+_t0] - - movdqa xmm2, xmm1 ; - punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - -%if %2 == 0 - movdqa [rsp+_p2], xmm1 - movdqa [rsp+_p3], xmm2 -%endif - -%endmacro - -%macro LFV_FILTER_MASK_HEV_MASK 0 - movdqa xmm0, xmm6 ; q2 - psubusb xmm0, xmm7 ; q2-q3 - - psubusb xmm7, xmm6 ; q3-q2 - movdqa xmm4, xmm5 ; q1 - - por xmm7, xmm0 ; abs (q3-q2) - psubusb xmm4, xmm6 ; q1-q2 - - movdqa xmm0, xmm1 - psubusb xmm6, xmm5 ; q2-q1 - - por xmm6, xmm4 ; abs (q2-q1) - psubusb xmm0, xmm2 ; p2 - p3; - - psubusb xmm2, xmm1 ; p3 - p2; - por xmm0, xmm2 ; abs(p2-p3) - - movdqa xmm5, [rsp+_p1] ; p1 - pmaxub xmm0, xmm7 - - movdqa xmm2, xmm5 ; p1 - psubusb xmm5, xmm1 ; p1-p2 - psubusb xmm1, xmm2 ; p2-p1 - - movdqa xmm7, xmm3 ; p0 - psubusb xmm7, xmm2 ; p0-p1 - - por xmm1, xmm5 ; abs(p2-p1) - pmaxub xmm0, xmm6 - - pmaxub xmm0, xmm1 - movdqa xmm1, xmm2 ; p1 - - psubusb xmm2, xmm3 ; p1-p0 - - por xmm2, xmm7 ; abs(p1-p0) - - pmaxub xmm0, xmm2 - - movdqa xmm5, [rsp+_q0] ; q0 - movdqa xmm7, [rsp+_q1] ; q1 - - mov rdx, arg(3) ; limit - - movdqa xmm6, xmm5 ; q0 - movdqa xmm4, xmm7 ; q1 - - psubusb xmm5, xmm7 ; q0-q1 - psubusb xmm7, xmm6 ; q1-q0 - - por xmm7, xmm5 ; abs(q1-q0) - - pmaxub xmm0, xmm7 - - psubusb xmm0, [rdx] ; limit - - mov rdx, arg(2) ; blimit - movdqa xmm5, xmm4 ; q1 - - psubusb xmm5, xmm1 ; q1-=p1 - psubusb xmm1, xmm4 ; p1-=q1 - - por xmm5, xmm1 ; abs(p1-q1) - movdqa xmm1, xmm3 ; p0 - - pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psubusb xmm1, xmm6 ; p0-q0 - - movdqa xmm4, [rdx] ; blimit - mov rdx, arg(4) ; get thresh - - psrlw xmm5, 1 ; abs(p1-q1)/2 - psubusb xmm6, xmm3 ; q0-p0 - - por xmm1, xmm6 ; abs(q0-p0) - paddusb xmm1, xmm1 ; abs(q0-p0)*2 - movdqa xmm3, [rdx] - - paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb xmm2, xmm3 ; abs(q1 - q0) > thresh - - psubusb xmm7, xmm3 ; abs(p1 - p0)> thresh - - psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por xmm2, xmm7 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - por xmm1, xmm0 ; mask - pcmpeqb xmm2, xmm0 - - pxor xmm0, xmm0 - pcmpeqb xmm4, xmm4 - - pcmpeqb xmm1, xmm0 - pxor xmm4, xmm2 -%endmacro - -%macro BV_TRANSPOSE 0 - ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 - movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - - movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - - punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 - - movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 - - punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 - movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - punpcklwd xmm1, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 - - punpckhwd xmm5, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 - ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 - ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 - ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 - ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 -%endmacro - -%macro BV_WRITEBACK 2 - movd [rsi+2], %1 - movd [rsi+4*rax+2], %2 - psrldq %1, 4 - psrldq %2, 4 - movd [rdi+2], %1 - movd [rdi+4*rax+2], %2 - psrldq %1, 4 - psrldq %2, 4 - movd [rsi+2*rax+2], %1 - movd [rsi+2*rcx+2], %2 - psrldq %1, 4 - psrldq %2, 4 - movd [rdi+2*rax+2], %1 - movd [rdi+2*rcx+2], %2 -%endmacro - -%if ABI_IS_32BIT - -;void vp8_loop_filter_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE -sym(vp8_loop_filter_vertical_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; src_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax*2+rax] - - ;transpose 16x8 to 8x16, and store the 8-line result on stack. - TRANSPOSE_16X8 1, 1 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - ; start work on filters - B_FILTER 2 - - ; transpose and write back - only work on q1, q0, p0, p1 - BV_TRANSPOSE - ; store 16-line result - - lea rdx, [rax] - neg rdx - - BV_WRITEBACK xmm1, xmm5 - - lea rsi, [rsi+rdx*8] - lea rdi, [rdi+rdx*8] - BV_WRITEBACK xmm2, xmm6 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -%endif - -;void vp8_loop_filter_vertical_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE -sym(vp8_loop_filter_vertical_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax+2*rax] - - ;transpose 16x8 to 8x16, and store the 8-line result on stack. - TRANSPOSE_16X8 0, 1 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - ; start work on filters - B_FILTER 2 - - ; transpose and write back - only work on q1, q0, p0, p1 - BV_TRANSPOSE - - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - - ; store 16-line result - BV_WRITEBACK xmm1, xmm5 - - mov rsi, arg(0) ; u_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - BV_WRITEBACK xmm2, xmm6 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -%macro MBV_TRANSPOSE 0 - movdqa xmm0, [rsp+_p3] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - - punpcklbw xmm0, xmm2 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpckhbw xmm1, xmm2 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - movdqa xmm7, [rsp+_p1] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - movdqa xmm6, xmm7 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - - punpcklbw xmm7, [rsp+_p0] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpckhbw xmm6, [rsp+_p0] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpcklwd xmm0, xmm7 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckhwd xmm3, xmm7 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - - movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpcklbw xmm7, [rsp+_q1] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - - movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 - punpcklbw xmm6, [rsp+_q3] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06 - - movdqa xmm2, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - punpcklwd xmm7, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04 - - punpckhwd xmm2, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44 - movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckldq xmm0, xmm7 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00 - punpckhdq xmm6, xmm7 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20 -%endmacro - -%macro MBV_WRITEBACK_1 0 - movq [rsi], xmm0 - movhps [rdi], xmm0 - - movq [rsi+2*rax], xmm6 - movhps [rdi+2*rax], xmm6 - - movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - punpckldq xmm0, xmm2 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40 - punpckhdq xmm3, xmm2 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60 - - movq [rsi+4*rax], xmm0 - movhps [rdi+4*rax], xmm0 - - movq [rsi+2*rcx], xmm3 - movhps [rdi+2*rcx], xmm3 - - movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpckhbw xmm7, [rsp+_q1] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 - punpckhbw xmm5, [rsp+_q3] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86 - - movdqa xmm0, xmm7 - punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84 - punpckhwd xmm7, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4 - - movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80 - punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0 -%endmacro - -%macro MBV_WRITEBACK_2 0 - movq [rsi], xmm1 - movhps [rdi], xmm1 - - movq [rsi+2*rax], xmm5 - movhps [rdi+2*rax], xmm5 - - movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - punpckldq xmm1, xmm7 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0 - punpckhdq xmm4, xmm7 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0 - - movq [rsi+4*rax], xmm1 - movhps [rdi+4*rax], xmm1 - - movq [rsi+2*rcx], xmm4 - movhps [rdi+2*rcx], xmm4 -%endmacro - - -;void vp8_mbloop_filter_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE -sym(vp8_mbloop_filter_vertical_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; src_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax*2+rax] - - ; Transpose - TRANSPOSE_16X8 1, 0 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - neg rax - ; start work on filters - MB_FILTER_AND_WRITEBACK 2 - - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] - - ; transpose and write back - MBV_TRANSPOSE - - neg rax - - MBV_WRITEBACK_1 - - - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] - MBV_WRITEBACK_2 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_vertical_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE -sym(vp8_mbloop_filter_vertical_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax+2*rax] - - ; Transpose - TRANSPOSE_16X8 0, 0 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - ; start work on filters - MB_FILTER_AND_WRITEBACK 2 - - ; transpose and write back - MBV_TRANSPOSE - - mov rsi, arg(0) ;u_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] - MBV_WRITEBACK_1 - mov rsi, arg(5) ;v_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] - MBV_WRITEBACK_2 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -;) -global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE -sym(vp8_loop_filter_simple_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 7 - GET_GOT rbx - ; end prolog - - mov rcx, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - movdqa xmm6, [GLOBAL(tfe)] - lea rdx, [rcx + rax] - neg rax - - ; calculate mask - movdqa xmm0, [rdx] ; q1 - mov rdx, arg(2) ;blimit - movdqa xmm1, [rcx+2*rax] ; p1 - - movdqa xmm2, xmm1 - movdqa xmm3, xmm0 - - psubusb xmm0, xmm1 ; q1-=p1 - psubusb xmm1, xmm3 ; p1-=q1 - por xmm1, xmm0 ; abs(p1-q1) - pand xmm1, xmm6 ; set lsb of each byte to zero - psrlw xmm1, 1 ; abs(p1-q1)/2 - - movdqa xmm7, XMMWORD PTR [rdx] - - movdqa xmm5, [rcx+rax] ; p0 - movdqa xmm4, [rcx] ; q0 - movdqa xmm0, xmm4 ; q0 - movdqa xmm6, xmm5 ; p0 - psubusb xmm5, xmm4 ; p0-=q0 - psubusb xmm4, xmm6 ; q0-=p0 - por xmm5, xmm4 ; abs(p0 - q0) - - movdqa xmm4, [GLOBAL(t80)] - - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor xmm7, xmm7 - pcmpeqb xmm5, xmm7 - - - ; start work on filters - pxor xmm2, xmm4 ; p1 offset to convert to signed values - pxor xmm3, xmm4 ; q1 offset to convert to signed values - psubsb xmm2, xmm3 ; p1 - q1 - - pxor xmm6, xmm4 ; offset to convert to signed values - pxor xmm0, xmm4 ; offset to convert to signed values - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) - paddsb xmm2, xmm0 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0) - pand xmm5, xmm2 ; mask filter values we don't care about - - movdqa xmm0, xmm5 - paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 - paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 - - movdqa xmm1, [GLOBAL(te0)] - movdqa xmm2, [GLOBAL(t1f)] - -; pxor xmm7, xmm7 - pcmpgtb xmm7, xmm0 ;save sign - pand xmm7, xmm1 ;preserve the upper 3 bits - psrlw xmm0, 3 - pand xmm0, xmm2 ;clear out upper 3 bits - por xmm0, xmm7 ;add sign - psubsb xmm3, xmm0 ; q0-= q0sz add - - pxor xmm7, xmm7 - pcmpgtb xmm7, xmm5 ;save sign - pand xmm7, xmm1 ;preserve the upper 3 bits - psrlw xmm5, 3 - pand xmm5, xmm2 ;clear out upper 3 bits - por xmm5, xmm7 ;add sign - paddsb xmm6, xmm5 ; p0+= p0 add - - pxor xmm3, xmm4 ; unoffset - movdqa [rcx], xmm3 ; write back - - pxor xmm6, xmm4 ; unoffset - movdqa [rcx+rax], xmm6 ; write back - - ; begin epilog - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -;) -global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE -sym(vp8_loop_filter_simple_vertical_edge_sse2): - push rbp ; save old base pointer value. - mov rbp, rsp ; set new base pointer value. - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 7 - GET_GOT rbx ; save callee-saved reg - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi - 2 ] - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd xmm0, [rsi] ; (high 96 bits unused) 03 02 01 00 - movd xmm1, [rdx] ; (high 96 bits unused) 43 42 41 40 - movd xmm2, [rdi] ; 13 12 11 10 - movd xmm3, [rcx] ; 53 52 51 50 - punpckldq xmm0, xmm1 ; (high 64 bits unused) 43 42 41 40 03 02 01 00 - punpckldq xmm2, xmm3 ; 53 52 51 50 13 12 11 10 - - movd xmm4, [rsi + rax*2] ; 23 22 21 20 - movd xmm5, [rdx + rax*2] ; 63 62 61 60 - movd xmm6, [rdi + rax*2] ; 33 32 31 30 - movd xmm7, [rcx + rax*2] ; 73 72 71 70 - punpckldq xmm4, xmm5 ; 63 62 61 60 23 22 21 20 - punpckldq xmm6, xmm7 ; 73 72 71 70 33 32 31 30 - - punpcklbw xmm0, xmm2 ; 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00 - punpcklbw xmm4, xmm6 ; 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20 - - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm4 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm4 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 - - movdqa xmm2, xmm0 - punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - - lea rsi, [rsi + rax*8] - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd xmm4, [rsi] ; 83 82 81 80 - movd xmm1, [rdx] ; c3 c2 c1 c0 - movd xmm6, [rdi] ; 93 92 91 90 - movd xmm3, [rcx] ; d3 d2 d1 d0 - punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80 - punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90 - - movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0 - movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0 - movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0 - movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0 - punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 - punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 - - punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80 - punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 - - movdqa xmm7, xmm4 - punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 - - movdqa xmm6, xmm4 - punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - - punpcklqdq xmm0, xmm4 ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - punpckhqdq xmm1, xmm4 ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - - mov rdx, arg(2) ;blimit - - ; calculate mask - movdqa xmm6, xmm0 ; p1 - movdqa xmm7, xmm3 ; q1 - psubusb xmm7, xmm0 ; q1-=p1 - psubusb xmm6, xmm3 ; p1-=q1 - por xmm6, xmm7 ; abs(p1-q1) - pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw xmm6, 1 ; abs(p1-q1)/2 - - movdqa xmm7, [rdx] - - movdqa xmm5, xmm1 ; p0 - movdqa xmm4, xmm2 ; q0 - psubusb xmm5, xmm2 ; p0-=q0 - psubusb xmm4, xmm1 ; q0-=p0 - por xmm5, xmm4 ; abs(p0 - q0) - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - movdqa xmm4, [GLOBAL(t80)] - - psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor xmm7, xmm7 - pcmpeqb xmm5, xmm7 ; mm5 = mask - - ; start work on filters - movdqa t0, xmm0 - movdqa t1, xmm3 - - pxor xmm0, xmm4 ; p1 offset to convert to signed values - pxor xmm3, xmm4 ; q1 offset to convert to signed values - psubsb xmm0, xmm3 ; p1 - q1 - - pxor xmm1, xmm4 ; offset to convert to signed values - pxor xmm2, xmm4 ; offset to convert to signed values - - movdqa xmm3, xmm2 ; offseted ; q0 - psubsb xmm2, xmm1 ; q0 - p0 - paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0) - paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0) - pand xmm5, xmm0 ; mask filter values we don't care about - - movdqa xmm0, xmm5 - paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 - paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 - - movdqa xmm6, [GLOBAL(te0)] - movdqa xmm2, [GLOBAL(t1f)] - -; pxor xmm7, xmm7 - pcmpgtb xmm7, xmm0 ;save sign - pand xmm7, xmm6 ;preserve the upper 3 bits - psrlw xmm0, 3 - pand xmm0, xmm2 ;clear out upper 3 bits - por xmm0, xmm7 ;add sign - psubsb xmm3, xmm0 ; q0-= q0sz add - - pxor xmm7, xmm7 - pcmpgtb xmm7, xmm5 ;save sign - pand xmm7, xmm6 ;preserve the upper 3 bits - psrlw xmm5, 3 - pand xmm5, xmm2 ;clear out upper 3 bits - por xmm5, xmm7 ;add sign - paddsb xmm1, xmm5 ; p0+= p0 add - - pxor xmm3, xmm4 ; unoffset q0 - pxor xmm1, xmm4 ; unoffset p0 - - movdqa xmm0, t0 ; p1 - movdqa xmm4, t1 ; q1 - - ; write out order: xmm0 xmm2 xmm1 xmm3 - lea rdx, [rsi + rax*4] - - ; transpose back to write out - ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - movdqa xmm6, xmm0 - punpcklbw xmm0, xmm1 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpckhbw xmm6, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - movdqa xmm5, xmm3 - punpcklbw xmm3, xmm4 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpckhbw xmm5, xmm4 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm3 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - punpckhwd xmm2, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - - movdqa xmm3, xmm6 - punpcklwd xmm6, xmm5 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckhwd xmm3, xmm5 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - - movd [rsi], xmm6 ; write the second 8-line result - movd [rdx], xmm3 - psrldq xmm6, 4 - psrldq xmm3, 4 - movd [rdi], xmm6 - movd [rcx], xmm3 - psrldq xmm6, 4 - psrldq xmm3, 4 - movd [rsi + rax*2], xmm6 - movd [rdx + rax*2], xmm3 - psrldq xmm6, 4 - psrldq xmm3, 4 - movd [rdi + rax*2], xmm6 - movd [rcx + rax*2], xmm3 - - neg rax - lea rsi, [rsi + rax*8] - neg rax - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd [rsi], xmm0 ; write the first 8-line result - movd [rdx], xmm2 - psrldq xmm0, 4 - psrldq xmm2, 4 - movd [rdi], xmm0 - movd [rcx], xmm2 - psrldq xmm0, 4 - psrldq xmm2, 4 - movd [rsi + rax*2], xmm0 - movd [rdx + rax*2], xmm2 - psrldq xmm0, 4 - psrldq xmm2, 4 - movd [rdi + rax*2], xmm0 - movd [rcx + rax*2], xmm2 - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -tfe: - times 16 db 0xfe -align 16 -t80: - times 16 db 0x80 -align 16 -t1s: - times 16 db 0x01 -align 16 -t3: - times 16 db 0x03 -align 16 -t4: - times 16 db 0x04 -align 16 -ones: - times 8 dw 0x0001 -align 16 -s9: - times 8 dw 0x0900 -align 16 -s63: - times 8 dw 0x003f -align 16 -te0: - times 16 db 0xe0 -align 16 -t1f: - times 16 db 0x1f diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c b/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c deleted file mode 100644 index 6586004600..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8/common/loopfilter.h" - -#define prototype_loopfilter(sym) \ - void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ - const unsigned char *limit, const unsigned char *thresh, int count) - -#define prototype_loopfilter_nc(sym) \ - void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ - const unsigned char *limit, const unsigned char *thresh) - -#define prototype_simple_loopfilter(sym) \ - void sym(unsigned char *y, int ystride, const unsigned char *blimit) - -prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx); -prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx); -prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx); -prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx); -prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx); -prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx); - -#if HAVE_SSE2 && ARCH_X86_64 -prototype_loopfilter(vp8_loop_filter_bv_y_sse2); -prototype_loopfilter(vp8_loop_filter_bh_y_sse2); -#else -prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2); -prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2); -#endif -prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2); -prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2); - -extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; -extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; -extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2; -extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; - -#if HAVE_MMX -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - - -void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit); -} - - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - - -void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit); -} -#endif - - -/* Horizontal MB filtering */ -#if HAVE_SSE2 -void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); -} - - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); -} - - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ -#if ARCH_X86_64 - vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); -#else - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); -#endif - - if (u_ptr) - vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride); -} - - -void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit); -} - - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ -#if ARCH_X86_64 - vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); -#else - vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); -#endif - - if (u_ptr) - vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4); -} - - -void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit); -} - -#endif diff --git a/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm b/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm deleted file mode 100644 index 15e98713c7..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm +++ /dev/null @@ -1,274 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void copy_mem8x8_mmx( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem8x8_mmx) PRIVATE -sym(vp8_copy_mem8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movq mm0, [rsi] - - movsxd rax, dword ptr arg(1) ;src_stride; - mov rdi, arg(2) ;dst; - - movq mm1, [rsi+rax] - movq mm2, [rsi+rax*2] - - movsxd rcx, dword ptr arg(3) ;dst_stride - lea rsi, [rsi+rax*2] - - movq [rdi], mm0 - add rsi, rax - - movq [rdi+rcx], mm1 - movq [rdi+rcx*2], mm2 - - - lea rdi, [rdi+rcx*2] - movq mm3, [rsi] - - add rdi, rcx - movq mm4, [rsi+rax] - - movq mm5, [rsi+rax*2] - movq [rdi], mm3 - - lea rsi, [rsi+rax*2] - movq [rdi+rcx], mm4 - - movq [rdi+rcx*2], mm5 - lea rdi, [rdi+rcx*2] - - movq mm0, [rsi+rax] - movq mm1, [rsi+rax*2] - - movq [rdi+rcx], mm0 - movq [rdi+rcx*2],mm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void copy_mem8x4_mmx( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem8x4_mmx) PRIVATE -sym(vp8_copy_mem8x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movq mm0, [rsi] - - movsxd rax, dword ptr arg(1) ;src_stride; - mov rdi, arg(2) ;dst; - - movq mm1, [rsi+rax] - movq mm2, [rsi+rax*2] - - movsxd rcx, dword ptr arg(3) ;dst_stride - lea rsi, [rsi+rax*2] - - movq [rdi], mm0 - movq [rdi+rcx], mm1 - - movq [rdi+rcx*2], mm2 - lea rdi, [rdi+rcx*2] - - movq mm3, [rsi+rax] - movq [rdi+rcx], mm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void copy_mem16x16_mmx( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem16x16_mmx) PRIVATE -sym(vp8_copy_mem16x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movsxd rax, dword ptr arg(1) ;src_stride; - - mov rdi, arg(2) ;dst; - movsxd rcx, dword ptr arg(3) ;dst_stride - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq [rdi], mm0 - movq [rdi+8], mm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm b/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm deleted file mode 100644 index cb89537f76..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm +++ /dev/null @@ -1,116 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void copy_mem16x16_sse2( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem16x16_sse2) PRIVATE -sym(vp8_copy_mem16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movdqu xmm0, [rsi] - - movsxd rax, dword ptr arg(1) ;src_stride; - mov rdi, arg(2) ;dst; - - movdqu xmm1, [rsi+rax] - movdqu xmm2, [rsi+rax*2] - - movsxd rcx, dword ptr arg(3) ;dst_stride - lea rsi, [rsi+rax*2] - - movdqa [rdi], xmm0 - add rsi, rax - - movdqa [rdi+rcx], xmm1 - movdqa [rdi+rcx*2],xmm2 - - lea rdi, [rdi+rcx*2] - movdqu xmm3, [rsi] - - add rdi, rcx - movdqu xmm4, [rsi+rax] - - movdqu xmm5, [rsi+rax*2] - lea rsi, [rsi+rax*2] - - movdqa [rdi], xmm3 - add rsi, rax - - movdqa [rdi+rcx], xmm4 - movdqa [rdi+rcx*2],xmm5 - - lea rdi, [rdi+rcx*2] - movdqu xmm0, [rsi] - - add rdi, rcx - movdqu xmm1, [rsi+rax] - - movdqu xmm2, [rsi+rax*2] - lea rsi, [rsi+rax*2] - - movdqa [rdi], xmm0 - add rsi, rax - - movdqa [rdi+rcx], xmm1 - - movdqa [rdi+rcx*2], xmm2 - movdqu xmm3, [rsi] - - movdqu xmm4, [rsi+rax] - lea rdi, [rdi+rcx*2] - - add rdi, rcx - movdqu xmm5, [rsi+rax*2] - - lea rsi, [rsi+rax*2] - movdqa [rdi], xmm3 - - add rsi, rax - movdqa [rdi+rcx], xmm4 - - movdqa [rdi+rcx*2],xmm5 - movdqu xmm0, [rsi] - - lea rdi, [rdi+rcx*2] - movdqu xmm1, [rsi+rax] - - add rdi, rcx - movdqu xmm2, [rsi+rax*2] - - lea rsi, [rsi+rax*2] - movdqa [rdi], xmm0 - - movdqa [rdi+rcx], xmm1 - movdqa [rdi+rcx*2],xmm2 - - movdqu xmm3, [rsi+rax] - lea rdi, [rdi+rcx*2] - - movdqa [rdi+rcx], xmm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm deleted file mode 100644 index 47dd452297..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm +++ /dev/null @@ -1,702 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -extern sym(vp8_bilinear_filters_x86_8) - - -%define BLOCK_HEIGHT_WIDTH 4 -%define vp8_filter_weight 128 -%define VP8_FILTER_SHIFT 7 - - -;void vp8_filter_block1d_h6_mmx -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short * vp8_filter -;) -global sym(vp8_filter_block1d_h6_mmx) PRIVATE -sym(vp8_filter_block1d_h6_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(6) ;vp8_filter - - movq mm1, [rdx + 16] ; do both the negative taps first!!! - movq mm2, [rdx + 32] ; - movq mm6, [rdx + 48] ; - movq mm7, [rdx + 64] ; - - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(5) ;output_width ; destination pitch? - pxor mm0, mm0 ; mm0 = 00000000 - -.nextrow: - movq mm3, [rsi-2] ; mm3 = p-2..p5 - movq mm4, mm3 ; mm4 = p-2..p5 - psrlq mm3, 8 ; mm3 = p-1..p5 - punpcklbw mm3, mm0 ; mm3 = p-1..p2 - pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. - - movq mm5, mm4 ; mm5 = p-2..p5 - punpckhbw mm4, mm0 ; mm5 = p2..p5 - pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers - paddsw mm3, mm4 ; mm3 += mm5 - - movq mm4, mm5 ; mm4 = p-2..p5; - psrlq mm5, 16 ; mm5 = p0..p5; - punpcklbw mm5, mm0 ; mm5 = p0..p3 - pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers - paddsw mm3, mm5 ; mm3 += mm5 - - movq mm5, mm4 ; mm5 = p-2..p5 - psrlq mm4, 24 ; mm4 = p1..p5 - punpcklbw mm4, mm0 ; mm4 = p1..p4 - pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers - paddsw mm3, mm4 ; mm3 += mm5 - - ; do outer positive taps - movd mm4, [rsi+3] - punpcklbw mm4, mm0 ; mm5 = p3..p6 - pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers - paddsw mm3, mm4 ; mm3 += mm5 - - punpcklbw mm5, mm0 ; mm5 = p-2..p1 - pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers - paddsw mm3, mm5 ; mm3 += mm5 - - paddsw mm3, [GLOBAL(rd)] ; mm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 - packuswb mm3, mm0 ; pack and unpack to saturate - punpcklbw mm3, mm0 ; - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line - add rdi, rax; -%else - movsxd r8, dword ptr arg(2) ;src_pixels_per_line - add rdi, rax; - - add rsi, r8 ; next line -%endif - - dec rcx ; decrement count - jnz .nextrow ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1dc_v6_mmx -;( -; short *src_ptr, -; unsigned char *output_ptr, -; int output_pitch, -; unsigned int pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short * vp8_filter -;) -global sym(vp8_filter_block1dc_v6_mmx) PRIVATE -sym(vp8_filter_block1dc_v6_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movq mm5, [GLOBAL(rd)] - push rbx - mov rbx, arg(7) ;vp8_filter - movq mm1, [rbx + 16] ; do both the negative taps first!!! - movq mm2, [rbx + 32] ; - movq mm6, [rbx + 48] ; - movq mm7, [rbx + 64] ; - - movsxd rdx, dword ptr arg(3) ;pixels_per_line - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - sub rsi, rdx - sub rsi, rdx - movsxd rcx, DWORD PTR arg(5) ;output_height - movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch? - pxor mm0, mm0 ; mm0 = 00000000 - - -.nextrow_cv: - movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1 - pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. - - - movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2 - pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0 - pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - movq mm4, [rsi] ; mm4 = p0..p3 = row -2 - pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - - add rsi, rdx ; move source forward 1 line to avoid 3 * pitch - movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1 - pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3 - pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - - paddsw mm3, mm5 ; mm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 - packuswb mm3, mm0 ; pack and saturate - - movd [rdi],mm3 ; store the results in the destination - ; the subsequent iterations repeat 3 out of 4 of these reads. Since the - ; recon block should be in cache this shouldn't cost much. Its obviously - ; avoidable!!!. - lea rdi, [rdi+rax] ; - dec rcx ; decrement count - jnz .nextrow_cv ; next row - - pop rbx - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict8x8_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x8_mmx) PRIVATE -sym(vp8_bilinear_predict8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - shl rax, 5 ; offset * 32 - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - - add rax, rcx ; HFilter - mov rsi, arg(0) ;src_ptr ; - - movsxd rdx, dword ptr arg(5) ;dst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - - shl rax, 5 ; offset*32 - add rax, rcx ; VFilter - - lea rcx, [rdi+rdx*8] ; - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - - - ; get the first horizontal line done ; - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - add rsi, rdx ; next line -.next_row_8x8: - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - movq mm5, mm7 ; - movq mm6, mm7 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 - - pmullw mm5, [rax] ; - pmullw mm6, [rax] ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - - pmullw mm3, [rax+16] ; - pmullw mm4, [rax+16] ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - packuswb mm3, mm4 - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch - add rsi, rdx ; next line - add rdi, r8 ;dst_pitch -%endif - cmp rdi, rcx ; - jne .next_row_8x8 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict8x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x4_mmx) PRIVATE -sym(vp8_bilinear_predict8x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - shl rax, 5 - - mov rsi, arg(0) ;src_ptr ; - add rax, rcx - - movsxd rdx, dword ptr arg(5) ;dst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - add rsi, rdx ; next line -.next_row_8x4: - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - movq mm5, mm7 ; - movq mm6, mm7 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 - - pmullw mm5, [rax] ; - pmullw mm6, [rax] ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - - pmullw mm3, [rax+16] ; - pmullw mm4, [rax+16] ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - packuswb mm3, mm4 - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch - add rsi, rdx ; next line - add rdi, r8 -%endif - cmp rdi, rcx ; - jne .next_row_8x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict4x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict4x4_mmx) PRIVATE -sym(vp8_bilinear_predict4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - shl rax, 5 - - add rax, rcx ; HFilter - mov rsi, arg(0) ;src_ptr ; - - movsxd rdx, dword ptr arg(5) ;ldst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movq mm7, mm3 ; - packuswb mm7, mm0 ; - - add rsi, rdx ; next line -.next_row_4x4: - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - - movq mm5, mm7 ; - punpcklbw mm5, mm0 ; - - pmullw mm5, [rax] ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - movq mm7, mm3 ; - - packuswb mm7, mm0 ; - - pmullw mm3, [rax+16] ; - paddw mm3, mm5 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - packuswb mm3, mm0 - movd [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch ; - add rsi, rdx ; next line - add rdi, r8 -%endif - - cmp rdi, rcx ; - jne .next_row_4x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - -SECTION_RODATA -align 16 -rd: - times 4 dw 0x40 - -align 16 -global HIDDEN_DATA(sym(vp8_six_tap_mmx)) -sym(vp8_six_tap_mmx): - times 8 dw 0 - times 8 dw 0 - times 8 dw 128 - times 8 dw 0 - times 8 dw 0 - times 8 dw 0 - - times 8 dw 0 - times 8 dw -6 - times 8 dw 123 - times 8 dw 12 - times 8 dw -1 - times 8 dw 0 - - times 8 dw 2 - times 8 dw -11 - times 8 dw 108 - times 8 dw 36 - times 8 dw -8 - times 8 dw 1 - - times 8 dw 0 - times 8 dw -9 - times 8 dw 93 - times 8 dw 50 - times 8 dw -6 - times 8 dw 0 - - times 8 dw 3 - times 8 dw -16 - times 8 dw 77 - times 8 dw 77 - times 8 dw -16 - times 8 dw 3 - - times 8 dw 0 - times 8 dw -6 - times 8 dw 50 - times 8 dw 93 - times 8 dw -9 - times 8 dw 0 - - times 8 dw 1 - times 8 dw -8 - times 8 dw 36 - times 8 dw 108 - times 8 dw -11 - times 8 dw 2 - - times 8 dw 0 - times 8 dw -1 - times 8 dw 12 - times 8 dw 123 - times 8 dw -6 - times 8 dw 0 - - diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm deleted file mode 100644 index 69f8d103c1..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm +++ /dev/null @@ -1,1372 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -extern sym(vp8_bilinear_filters_x86_8) - -%define BLOCK_HEIGHT_WIDTH 4 -%define VP8_FILTER_WEIGHT 128 -%define VP8_FILTER_SHIFT 7 - - -;/************************************************************************************ -; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The -; input pixel array has output_height rows. This routine assumes that output_height is an -; even number. This function handles 8 pixels in horizontal direction, calculating ONE -; rows each iteration to take advantage of the 128 bits operations. -;*************************************************************************************/ -;void vp8_filter_block1d8_h6_sse2 -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short *vp8_filter -;) -global sym(vp8_filter_block1d8_h6_sse2) PRIVATE -sym(vp8_filter_block1d8_h6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(6) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(1) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;output_width -%endif - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d8_h6_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - punpcklbw xmm4, xmm0 - - movdqa XMMWORD Ptr [rdi], xmm4 - lea rsi, [rsi + rax] - -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(5) ;[output_width] -%else - add rdi, r8 -%endif - dec rcx - - jnz .filter_block1d8_h6_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d16_h6_sse2 -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short *vp8_filter -;) -;/************************************************************************************ -; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The -; input pixel array has output_height rows. This routine assumes that output_height is an -; even number. This function handles 8 pixels in horizontal direction, calculating ONE -; rows each iteration to take advantage of the 128 bits operations. -;*************************************************************************************/ -global sym(vp8_filter_block1d16_h6_sse2) PRIVATE -sym(vp8_filter_block1d16_h6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(6) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(1) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;output_width -%endif - - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d16_h6_sse2_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - movq xmm2, MMWORD PTR [rsi +14] - pslldq xmm2, 8 - - por xmm2, xmm1 - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - punpcklbw xmm4, xmm0 - - movdqa XMMWORD Ptr [rdi], xmm4 - - movdqa xmm3, xmm2 - movdqa xmm4, xmm2 - - movdqa xmm5, xmm2 - movdqa xmm6, xmm2 - - movdqa xmm7, xmm2 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm2 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - punpcklbw xmm4, xmm0 - - movdqa XMMWORD Ptr [rdi+16], xmm4 - - lea rsi, [rsi + rax] -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(5) ;[output_width] -%else - add rdi, r8 -%endif - - dec rcx - jnz .filter_block1d16_h6_sse2_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d8_v6_sse2 -;( -; short *src_ptr, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short * vp8_filter -;) -;/************************************************************************************ -; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The -; input pixel array has output_height rows. -;*************************************************************************************/ -global sym(vp8_filter_block1d8_v6_sse2) PRIVATE -sym(vp8_filter_block1d8_v6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(7) ;vp8_filter - movsxd rdx, dword ptr arg(3) ;pixels_per_line - - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - - sub rsi, rdx - sub rsi, rdx - - movsxd rcx, DWORD PTR arg(5) ;[output_height] - pxor xmm0, xmm0 ; clear xmm0 - - movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(2) ; dst_ptich -%endif - -.vp8_filter_block1d8_v6_sse2_loop: - movdqa xmm1, XMMWORD PTR [rsi] - pmullw xmm1, [rax] - - movdqa xmm2, XMMWORD PTR [rsi + rdx] - pmullw xmm2, [rax + 16] - - movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] - pmullw xmm3, [rax + 32] - - movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] - pmullw xmm5, [rax + 64] - - add rsi, rdx - movdqa xmm4, XMMWORD PTR [rsi + rdx * 2] - - pmullw xmm4, [rax + 48] - movdqa xmm6, XMMWORD PTR [rsi + rdx * 4] - - pmullw xmm6, [rax + 80] - - paddsw xmm2, xmm5 - paddsw xmm2, xmm3 - - paddsw xmm2, xmm1 - paddsw xmm2, xmm4 - - paddsw xmm2, xmm6 - paddsw xmm2, xmm7 - - psraw xmm2, 7 - packuswb xmm2, xmm0 ; pack and saturate - - movq QWORD PTR [rdi], xmm2 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(2) ;[dst_ptich] -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz .vp8_filter_block1d8_v6_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d16_v6_sse2 -;( -; unsigned short *src_ptr, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; const short *vp8_filter -;) -;/************************************************************************************ -; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The -; input pixel array has output_height rows. -;*************************************************************************************/ -global sym(vp8_filter_block1d16_v6_sse2) PRIVATE -sym(vp8_filter_block1d16_v6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(7) ;vp8_filter - movsxd rdx, dword ptr arg(3) ;pixels_per_line - - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - - sub rsi, rdx - sub rsi, rdx - - movsxd rcx, DWORD PTR arg(5) ;[output_height] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(2) ; dst_ptich -%endif - -.vp8_filter_block1d16_v6_sse2_loop: -; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order. - movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2 - movdqa xmm2, XMMWORD PTR [rsi + rdx + 16] - pmullw xmm1, [rax + 16] - pmullw xmm2, [rax + 16] - - movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5 - movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16] - pmullw xmm3, [rax + 64] - pmullw xmm4, [rax + 64] - - movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3 - movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16] - pmullw xmm5, [rax + 32] - pmullw xmm6, [rax + 32] - - movdqa xmm7, XMMWORD PTR [rsi] ; line 1 - movdqa xmm0, XMMWORD PTR [rsi + 16] - pmullw xmm7, [rax] - pmullw xmm0, [rax] - - paddsw xmm1, xmm3 - paddsw xmm2, xmm4 - paddsw xmm1, xmm5 - paddsw xmm2, xmm6 - paddsw xmm1, xmm7 - paddsw xmm2, xmm0 - - add rsi, rdx - - movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4 - movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16] - pmullw xmm3, [rax + 48] - pmullw xmm4, [rax + 48] - - movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6 - movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16] - pmullw xmm5, [rax + 80] - pmullw xmm6, [rax + 80] - - movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] - pxor xmm0, xmm0 ; clear xmm0 - - paddsw xmm1, xmm3 - paddsw xmm2, xmm4 - paddsw xmm1, xmm5 - paddsw xmm2, xmm6 - - paddsw xmm1, xmm7 - paddsw xmm2, xmm7 - - psraw xmm1, 7 - psraw xmm2, 7 - - packuswb xmm1, xmm2 ; pack and saturate - movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(2) ;[dst_ptich] -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz .vp8_filter_block1d16_v6_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d8_h6_only_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int output_height, -; const short *vp8_filter -;) -; First-pass filter only when yoffset==0 -global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE -sym(vp8_filter_block1d8_h6_only_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(5) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(2) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(3) ;dst_ptich -%endif - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d8_h6_only_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - - movq QWORD PTR [rdi], xmm4 ; store the results in the destination - lea rsi, [rsi + rax] - -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(3) ;dst_ptich -%else - add rdi, r8 -%endif - dec rcx - - jnz .filter_block1d8_h6_only_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d16_h6_only_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int output_height, -; const short *vp8_filter -;) -; First-pass filter only when yoffset==0 -global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE -sym(vp8_filter_block1d16_h6_only_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(5) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(2) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(3) ;dst_ptich -%endif - - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d16_h6_only_sse2_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - movq xmm2, MMWORD PTR [rsi +14] - pslldq xmm2, 8 - - por xmm2, xmm1 - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 ; lower 8 bytes - - movq QWORD Ptr [rdi], xmm4 ; store the results in the destination - - movdqa xmm3, xmm2 - movdqa xmm4, xmm2 - - movdqa xmm5, xmm2 - movdqa xmm6, xmm2 - - movdqa xmm7, xmm2 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm2 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 ; higher 8 bytes - - movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination - - lea rsi, [rsi + rax] -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(3) ;dst_ptich -%else - add rdi, r8 -%endif - - dec rcx - jnz .filter_block1d16_h6_only_sse2_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d8_v6_only_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int output_height, -; const short *vp8_filter -;) -; Second-pass filter only when xoffset==0 -global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE -sym(vp8_filter_block1d8_v6_only_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - mov rax, arg(5) ;vp8_filter - - pxor xmm0, xmm0 ; clear xmm0 - - movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(3) ; dst_ptich -%endif - -.vp8_filter_block1d8_v6_only_sse2_loop: - movq xmm1, MMWORD PTR [rsi] - movq xmm2, MMWORD PTR [rsi + rdx] - movq xmm3, MMWORD PTR [rsi + rdx * 2] - movq xmm5, MMWORD PTR [rsi + rdx * 4] - add rsi, rdx - movq xmm4, MMWORD PTR [rsi + rdx * 2] - movq xmm6, MMWORD PTR [rsi + rdx * 4] - - punpcklbw xmm1, xmm0 - pmullw xmm1, [rax] - - punpcklbw xmm2, xmm0 - pmullw xmm2, [rax + 16] - - punpcklbw xmm3, xmm0 - pmullw xmm3, [rax + 32] - - punpcklbw xmm5, xmm0 - pmullw xmm5, [rax + 64] - - punpcklbw xmm4, xmm0 - pmullw xmm4, [rax + 48] - - punpcklbw xmm6, xmm0 - pmullw xmm6, [rax + 80] - - paddsw xmm2, xmm5 - paddsw xmm2, xmm3 - - paddsw xmm2, xmm1 - paddsw xmm2, xmm4 - - paddsw xmm2, xmm6 - paddsw xmm2, xmm7 - - psraw xmm2, 7 - packuswb xmm2, xmm0 ; pack and saturate - - movq QWORD PTR [rdi], xmm2 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[dst_ptich] -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz .vp8_filter_block1d8_v6_only_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_unpack_block1d16_h6_sse2 -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int output_height, -; unsigned int output_width -;) -global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE -sym(vp8_unpack_block1d16_h6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;output_ptr - - movsxd rcx, dword ptr arg(3) ;output_height - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source - - pxor xmm0, xmm0 ; clear xmm0 for unpack -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source -%endif - -.unpack_block1d16_h6_sse2_rowloop: - movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2 - movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - punpcklbw xmm1, xmm0 - - movdqa XMMWORD Ptr [rdi], xmm1 - movdqa XMMWORD Ptr [rdi + 16], xmm3 - - lea rsi, [rsi + rax] -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(4) ;[output_width] -%else - add rdi, r8 -%endif - dec rcx - jnz .unpack_block1d16_h6_sse2_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_bilinear_predict16x16_sse2 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -extern sym(vp8_bilinear_filters_x86_8) -global sym(vp8_bilinear_predict16x16_sse2) PRIVATE -sym(vp8_bilinear_predict16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - movsxd rax, dword ptr arg(2) ;xoffset - - cmp rax, 0 ;skip first_pass filter if xoffset=0 - je .b16x16_sp_only - - shl rax, 5 - add rax, rcx ;HFilter - - mov rdi, arg(4) ;dst_ptr - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - movsxd rax, dword ptr arg(3) ;yoffset - - cmp rax, 0 ;skip second_pass filter if yoffset=0 - je .b16x16_fp_only - - shl rax, 5 - add rax, rcx ;VFilter - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - pxor xmm0, xmm0 - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;dst_pitch -%endif - ; get the first horizontal line done - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 - - add rsi, rdx ; next line -.next_row: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - movdqa xmm5, xmm7 - movdqa xmm6, xmm7 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, [rax] - pmullw xmm6, [rax] - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 - - pmullw xmm3, [rax+16] - pmullw xmm4, [rax+16] - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rdx ; next line -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(5) ;dst_pitch -%else - add rdi, r8 -%endif - - cmp rdi, rcx - jne .next_row - - jmp .done - -.b16x16_sp_only: - movsxd rax, dword ptr arg(3) ;yoffset - shl rax, 5 - add rax, rcx ;VFilter - - mov rdi, arg(4) ;dst_ptr - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - - pxor xmm0, xmm0 - - ; get the first horizontal line done - movdqu xmm7, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - - add rsi, rax ; next line -.next_row_spo: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - - movdqa xmm5, xmm7 - movdqa xmm6, xmm7 - - movdqa xmm4, xmm3 ; make a copy of current line - movdqa xmm7, xmm3 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm5, xmm1 - pmullw xmm6, xmm1 - pmullw xmm3, xmm2 - pmullw xmm4, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rax ; next line - add rdi, rdx ;dst_pitch - cmp rdi, rcx - jne .next_row_spo - - jmp .done - -.b16x16_fp_only: - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - pxor xmm0, xmm0 - -.next_row_fpo: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rax ; next line - add rdi, rdx ; dst_pitch - cmp rdi, rcx - jne .next_row_fpo - -.done: - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_bilinear_predict8x8_sse2 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x8_sse2) PRIVATE -sym(vp8_bilinear_predict8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 144 ; reserve 144 bytes - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - ;Read 9-line unaligned data in and put them on stack. This gives a big - ;performance boost. - movdqu xmm0, [rsi] - lea rax, [rdx + rdx*2] - movdqu xmm1, [rsi+rdx] - movdqu xmm2, [rsi+rdx*2] - add rsi, rax - movdqu xmm3, [rsi] - movdqu xmm4, [rsi+rdx] - movdqu xmm5, [rsi+rdx*2] - add rsi, rax - movdqu xmm6, [rsi] - movdqu xmm7, [rsi+rdx] - - movdqa XMMWORD PTR [rsp], xmm0 - - movdqu xmm0, [rsi+rdx*2] - - movdqa XMMWORD PTR [rsp+16], xmm1 - movdqa XMMWORD PTR [rsp+32], xmm2 - movdqa XMMWORD PTR [rsp+48], xmm3 - movdqa XMMWORD PTR [rsp+64], xmm4 - movdqa XMMWORD PTR [rsp+80], xmm5 - movdqa XMMWORD PTR [rsp+96], xmm6 - movdqa XMMWORD PTR [rsp+112], xmm7 - movdqa XMMWORD PTR [rsp+128], xmm0 - - movsxd rax, dword ptr arg(2) ;xoffset - shl rax, 5 - add rax, rcx ;HFilter - - mov rdi, arg(4) ;dst_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - movsxd rax, dword ptr arg(3) ;yoffset - shl rax, 5 - add rax, rcx ;VFilter - - lea rcx, [rdi+rdx*8] - - movdqa xmm5, [rax] - movdqa xmm6, [rax+16] - - pxor xmm0, xmm0 - - ; get the first horizontal line done - movdqa xmm3, XMMWORD PTR [rsp] - movdqa xmm4, xmm3 ; make a copy of current line - psrldq xmm4, 1 - - punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 - punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm2 - - paddw xmm3, xmm4 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm7, xmm3 - add rsp, 16 ; next line -.next_row8x8: - movdqa xmm3, XMMWORD PTR [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - movdqa xmm4, xmm3 ; make a copy of current line - psrldq xmm4, 1 - - punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 - punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm2 - - paddw xmm3, xmm4 - pmullw xmm7, xmm5 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm4, xmm3 - - pmullw xmm3, xmm6 - paddw xmm3, xmm7 - - movdqa xmm7, xmm4 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - packuswb xmm3, xmm0 - movq [rdi], xmm3 ; store the results in the destination - - add rsp, 16 ; next line - add rdi, rdx - - cmp rdi, rcx - jne .next_row8x8 - - ;add rsp, 144 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -SECTION_RODATA -align 16 -rd: - times 8 dw 0x40 diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm deleted file mode 100644 index c06f24556e..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm +++ /dev/null @@ -1,1508 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%define BLOCK_HEIGHT_WIDTH 4 -%define VP8_FILTER_WEIGHT 128 -%define VP8_FILTER_SHIFT 7 - - -;/************************************************************************************ -; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The -; input pixel array has output_height rows. This routine assumes that output_height is an -; even number. This function handles 8 pixels in horizontal direction, calculating ONE -; rows each iteration to take advantage of the 128 bits operations. -; -; This is an implementation of some of the SSE optimizations first seen in ffvp8 -; -;*************************************************************************************/ -;void vp8_filter_block1d8_h6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE -sym(vp8_filter_block1d8_h6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 - - movdqa xmm7, [GLOBAL(rd)] - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - mov rdi, arg(2) ;output_ptr - - cmp esi, DWORD PTR [rax] - je vp8_filter_block1d8_h4_ssse3 - - movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - - sub rdi, rdx -;xmm3 free -.filter_block1d8_h6_rowloop_ssse3: - movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 - - movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 - - punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 - - movdqa xmm1, xmm0 - pmaddubsw xmm0, xmm4 - - movdqa xmm2, xmm1 - pshufb xmm1, [GLOBAL(shuf2bfrom1)] - - pshufb xmm2, [GLOBAL(shuf3bfrom1)] - pmaddubsw xmm1, xmm5 - - lea rdi, [rdi + rdx] - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] - dec rcx - - paddsw xmm0, xmm1 - paddsw xmm2, xmm7 - - paddsw xmm0, xmm2 - - psraw xmm0, 7 - - packuswb xmm0, xmm0 - - movq MMWORD Ptr [rdi], xmm0 - jnz .filter_block1d8_h6_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -vp8_filter_block1d8_h4_ssse3: - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] - movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] - - mov rsi, arg(0) ;src_ptr - - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - - sub rdi, rdx - -.filter_block1d8_h4_rowloop_ssse3: - movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 - - movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 - - punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 - - movdqa xmm2, xmm0 - pshufb xmm0, xmm3 - - pshufb xmm2, xmm4 - pmaddubsw xmm0, xmm5 - - lea rdi, [rdi + rdx] - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] - dec rcx - - paddsw xmm0, xmm7 - - paddsw xmm0, xmm2 - - psraw xmm0, 7 - - packuswb xmm0, xmm0 - - movq MMWORD Ptr [rdi], xmm0 - - jnz .filter_block1d8_h4_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -;void vp8_filter_block1d16_h6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE -sym(vp8_filter_block1d16_h6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - mov rdi, arg(2) ;output_ptr - - mov rsi, arg(0) ;src_ptr - - movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rdx, dword ptr arg(3) ;output_pitch - -.filter_block1d16_h6_rowloop_ssse3: - movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 - - movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 - - punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 - - movdqa xmm1, xmm0 - pmaddubsw xmm0, xmm4 - - movdqa xmm2, xmm1 - pshufb xmm1, [GLOBAL(shuf2bfrom1)] - - pshufb xmm2, [GLOBAL(shuf3bfrom1)] - movq xmm3, MMWORD PTR [rsi + 6] - - pmaddubsw xmm1, xmm5 - movq xmm7, MMWORD PTR [rsi + 11] - - pmaddubsw xmm2, xmm6 - punpcklbw xmm3, xmm7 - - paddsw xmm0, xmm1 - movdqa xmm1, xmm3 - - pmaddubsw xmm3, xmm4 - paddsw xmm0, xmm2 - - movdqa xmm2, xmm1 - paddsw xmm0, [GLOBAL(rd)] - - pshufb xmm1, [GLOBAL(shuf2bfrom1)] - pshufb xmm2, [GLOBAL(shuf3bfrom1)] - - psraw xmm0, 7 - pmaddubsw xmm1, xmm5 - - pmaddubsw xmm2, xmm6 - packuswb xmm0, xmm0 - - lea rsi, [rsi + rax] - paddsw xmm3, xmm1 - - paddsw xmm3, xmm2 - - paddsw xmm3, [GLOBAL(rd)] - - psraw xmm3, 7 - - packuswb xmm3, xmm3 - - punpcklqdq xmm0, xmm3 - - movdqa XMMWORD Ptr [rdi], xmm0 - - lea rdi, [rdi + rdx] - dec rcx - jnz .filter_block1d16_h6_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_filter_block1d4_h6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE -sym(vp8_filter_block1d4_h6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - movdqa xmm7, [GLOBAL(rd)] - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d4_h4_ssse3 - - movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - -;xmm3 free -.filter_block1d4_h6_rowloop_ssse3: - movdqu xmm0, XMMWORD PTR [rsi - 2] - - movdqa xmm1, xmm0 - pshufb xmm0, [GLOBAL(shuf1b)] - - movdqa xmm2, xmm1 - pshufb xmm1, [GLOBAL(shuf2b)] - pmaddubsw xmm0, xmm4 - pshufb xmm2, [GLOBAL(shuf3b)] - pmaddubsw xmm1, xmm5 - -;-- - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] -;-- - paddsw xmm0, xmm1 - paddsw xmm0, xmm7 - pxor xmm1, xmm1 - paddsw xmm0, xmm2 - psraw xmm0, 7 - packuswb xmm0, xmm0 - - movd DWORD PTR [rdi], xmm0 - - add rdi, rdx - dec rcx - jnz .filter_block1d4_h6_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d4_h4_ssse3: - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)] - movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)] - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - -.filter_block1d4_h4_rowloop_ssse3: - movdqu xmm1, XMMWORD PTR [rsi - 2] - - movdqa xmm2, xmm1 - pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)] - pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)] - pmaddubsw xmm1, xmm5 - -;-- - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] -;-- - paddsw xmm1, xmm7 - paddsw xmm1, xmm2 - psraw xmm1, 7 - packuswb xmm1, xmm1 - - movd DWORD PTR [rdi], xmm1 - - add rdi, rdx - dec rcx - jnz .filter_block1d4_h4_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - - -;void vp8_filter_block1d16_v6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE -sym(vp8_filter_block1d16_v6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d16_v4_ssse3 - - movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr - -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ;out_pitch -%endif - mov rax, rsi - movsxd rcx, DWORD PTR arg(4) ;output_height - add rax, rdx - - -.vp8_filter_block1d16_v6_ssse3_loop: - movq xmm1, MMWORD PTR [rsi] ;A - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - movq xmm0, MMWORD PTR [rax + rdx * 4] ;F - - pmaddubsw xmm3, xmm6 - punpcklbw xmm1, xmm0 ;A F - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm5 - - paddsw xmm2, xmm3 - paddsw xmm2, xmm1 - paddsw xmm2, [GLOBAL(rd)] - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi], xmm2 ;store the results - - movq xmm1, MMWORD PTR [rsi + 8] ;A - movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F - pmaddubsw xmm3, xmm6 - punpcklbw xmm1, xmm0 ;A F - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm5 - - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm2, xmm3 - paddsw xmm2, xmm1 - paddsw xmm2, [GLOBAL(rd)] - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi+8], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;out_pitch -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d16_v6_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d16_v4_ssse3: - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr - -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ;out_pitch -%endif - mov rax, rsi - movsxd rcx, DWORD PTR arg(4) ;output_height - add rax, rdx - -.vp8_filter_block1d16_v4_ssse3_loop: - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - pmaddubsw xmm3, xmm6 - pmaddubsw xmm2, xmm7 - movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B - movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E - - paddsw xmm2, [GLOBAL(rd)] - paddsw xmm2, xmm3 - psraw xmm2, 7 - packuswb xmm2, xmm2 - - punpcklbw xmm5, xmm4 ;B D - punpcklbw xmm1, xmm0 ;C E - - pmaddubsw xmm1, xmm6 - pmaddubsw xmm5, xmm7 - - movdqa xmm4, [GLOBAL(rd)] - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm5, xmm1 - paddsw xmm5, xmm4 - psraw xmm5, 7 - packuswb xmm5, xmm5 - - punpcklqdq xmm2, xmm5 - - movdqa XMMWORD PTR [rdi], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;out_pitch -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d16_v4_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_filter_block1d8_v6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE -sym(vp8_filter_block1d8_v6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ; out_pitch -%endif - movsxd rcx, DWORD PTR arg(4) ;[output_height] - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d8_v4_ssse3 - - movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d8_v6_ssse3_loop: - movq xmm1, MMWORD PTR [rsi] ;A - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - movq xmm0, MMWORD PTR [rax + rdx * 4] ;F - movdqa xmm4, [GLOBAL(rd)] - - pmaddubsw xmm3, xmm6 - punpcklbw xmm1, xmm0 ;A F - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm5 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm2, xmm3 - paddsw xmm2, xmm1 - paddsw xmm2, xmm4 - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d8_v6_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d8_v4_ssse3: - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - movdqa xmm5, [GLOBAL(rd)] - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d8_v4_ssse3_loop: - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - pmaddubsw xmm3, xmm6 - pmaddubsw xmm2, xmm7 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm2, xmm3 - paddsw xmm2, xmm5 - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d8_v4_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -;void vp8_filter_block1d4_v6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE -sym(vp8_filter_block1d4_v6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ; out_pitch -%endif - movsxd rcx, DWORD PTR arg(4) ;[output_height] - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d4_v4_ssse3 - - movq mm5, MMWORD PTR [rax] ;k0_k5 - movq mm6, MMWORD PTR [rax+256] ;k2_k4 - movq mm7, MMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d4_v6_ssse3_loop: - movd mm1, DWORD PTR [rsi] ;A - movd mm2, DWORD PTR [rsi + rdx] ;B - movd mm3, DWORD PTR [rsi + rdx * 2] ;C - movd mm4, DWORD PTR [rax + rdx * 2] ;D - movd mm0, DWORD PTR [rsi + rdx * 4] ;E - - punpcklbw mm2, mm4 ;B D - punpcklbw mm3, mm0 ;C E - - movd mm0, DWORD PTR [rax + rdx * 4] ;F - - movq mm4, [GLOBAL(rd)] - - pmaddubsw mm3, mm6 - punpcklbw mm1, mm0 ;A F - pmaddubsw mm2, mm7 - pmaddubsw mm1, mm5 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw mm2, mm3 - paddsw mm2, mm1 - paddsw mm2, mm4 - psraw mm2, 7 - packuswb mm2, mm2 - - movd DWORD PTR [rdi], mm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d4_v6_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d4_v4_ssse3: - movq mm6, MMWORD PTR [rax+256] ;k2_k4 - movq mm7, MMWORD PTR [rax+128] ;k1_k3 - movq mm5, MMWORD PTR [GLOBAL(rd)] - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d4_v4_ssse3_loop: - movd mm2, DWORD PTR [rsi + rdx] ;B - movd mm3, DWORD PTR [rsi + rdx * 2] ;C - movd mm4, DWORD PTR [rax + rdx * 2] ;D - movd mm0, DWORD PTR [rsi + rdx * 4] ;E - - punpcklbw mm2, mm4 ;B D - punpcklbw mm3, mm0 ;C E - - pmaddubsw mm3, mm6 - pmaddubsw mm2, mm7 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw mm2, mm3 - paddsw mm2, mm5 - psraw mm2, 7 - packuswb mm2, mm2 - - movd DWORD PTR [rdi], mm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d4_v4_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_bilinear_predict16x16_ssse3 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE -sym(vp8_bilinear_predict16x16_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] - movsxd rax, dword ptr arg(2) ; xoffset - - cmp rax, 0 ; skip first_pass filter if xoffset=0 - je .b16x16_sp_only - - shl rax, 4 - lea rax, [rax + rcx] ; HFilter - - mov rdi, arg(4) ; dst_ptr - mov rsi, arg(0) ; src_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm1, [rax] - - movsxd rax, dword ptr arg(3) ; yoffset - - cmp rax, 0 ; skip second_pass filter if yoffset=0 - je .b16x16_fp_only - - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rdx, dword ptr arg(1) ; src_pixels_per_line - - movdqa xmm2, [rax] - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ; dst_pitch -%endif - movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07 - movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 - - punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 - movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 - - movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 - - lea rsi, [rsi + rdx] ; next line - - pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14 - - punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 - pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value - psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - -.next_row: - movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07 - movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 - - punpcklbw xmm6, xmm5 - movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 - - movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 - lea rsi, [rsi + rdx] ; next line - - pmaddubsw xmm6, xmm1 - - punpcklbw xmm4, xmm5 - pmaddubsw xmm4, xmm1 - - paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value - psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 - - paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value - psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 - - packuswb xmm6, xmm4 - movdqa xmm5, xmm7 - - punpcklbw xmm5, xmm6 - pmaddubsw xmm5, xmm2 - - punpckhbw xmm7, xmm6 - pmaddubsw xmm7, xmm2 - - paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value - psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128 - - paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value - psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 - - packuswb xmm5, xmm7 - movdqa xmm7, xmm6 - - movdqa [rdi], xmm5 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(5) ; dst_pitch -%else - add rdi, r8 -%endif - - cmp rdi, rcx - jne .next_row - - jmp .done - -.b16x16_sp_only: - movsxd rax, dword ptr arg(3) ; yoffset - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - mov rdi, arg(4) ; dst_ptr - mov rsi, arg(0) ; src_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm1, [rax] ; VFilter - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ; src_pixels_per_line - - ; get the first horizontal line done - movq xmm4, [rsi] ; load row 0 - movq xmm2, [rsi + 8] ; load row 0 - - lea rsi, [rsi + rax] ; next line -.next_row_sp: - movq xmm3, [rsi] ; load row + 1 - movq xmm5, [rsi + 8] ; load row + 1 - - punpcklbw xmm4, xmm3 - punpcklbw xmm2, xmm5 - - pmaddubsw xmm4, xmm1 - movq xmm7, [rsi + rax] ; load row + 2 - - pmaddubsw xmm2, xmm1 - movq xmm6, [rsi + rax + 8] ; load row + 2 - - punpcklbw xmm3, xmm7 - punpcklbw xmm5, xmm6 - - pmaddubsw xmm3, xmm1 - paddw xmm4, [GLOBAL(rd)] - - pmaddubsw xmm5, xmm1 - paddw xmm2, [GLOBAL(rd)] - - psraw xmm4, VP8_FILTER_SHIFT - psraw xmm2, VP8_FILTER_SHIFT - - packuswb xmm4, xmm2 - paddw xmm3, [GLOBAL(rd)] - - movdqa [rdi], xmm4 ; store row 0 - paddw xmm5, [GLOBAL(rd)] - - psraw xmm3, VP8_FILTER_SHIFT - psraw xmm5, VP8_FILTER_SHIFT - - packuswb xmm3, xmm5 - movdqa xmm4, xmm7 - - movdqa [rdi + rdx],xmm3 ; store row 1 - lea rsi, [rsi + 2*rax] - - movdqa xmm2, xmm6 - lea rdi, [rdi + 2*rdx] - - cmp rdi, rcx - jne .next_row_sp - - jmp .done - -.b16x16_fp_only: - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ; src_pixels_per_line - -.next_row_fp: - movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07 - movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08 - - punpcklbw xmm2, xmm4 - movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15 - - pmaddubsw xmm2, xmm1 - movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16 - - lea rsi, [rsi + rax] ; next line - punpcklbw xmm3, xmm4 - - pmaddubsw xmm3, xmm1 - movq xmm5, [rsi] - - paddw xmm2, [GLOBAL(rd)] - movq xmm7, [rsi+1] - - movq xmm6, [rsi+8] - psraw xmm2, VP8_FILTER_SHIFT - - punpcklbw xmm5, xmm7 - movq xmm7, [rsi+9] - - paddw xmm3, [GLOBAL(rd)] - pmaddubsw xmm5, xmm1 - - psraw xmm3, VP8_FILTER_SHIFT - punpcklbw xmm6, xmm7 - - packuswb xmm2, xmm3 - pmaddubsw xmm6, xmm1 - - movdqa [rdi], xmm2 ; store the results in the destination - paddw xmm5, [GLOBAL(rd)] - - lea rdi, [rdi + rdx] ; dst_pitch - psraw xmm5, VP8_FILTER_SHIFT - - paddw xmm6, [GLOBAL(rd)] - psraw xmm6, VP8_FILTER_SHIFT - - packuswb xmm5, xmm6 - lea rsi, [rsi + rax] ; next line - - movdqa [rdi], xmm5 ; store the results in the destination - lea rdi, [rdi + rdx] ; dst_pitch - - cmp rdi, rcx - - jne .next_row_fp - -.done: - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_bilinear_predict8x8_ssse3 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE -sym(vp8_bilinear_predict8x8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 144 ; reserve 144 bytes - - lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] - - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - ;Read 9-line unaligned data in and put them on stack. This gives a big - ;performance boost. - movdqu xmm0, [rsi] - lea rax, [rdx + rdx*2] - movdqu xmm1, [rsi+rdx] - movdqu xmm2, [rsi+rdx*2] - add rsi, rax - movdqu xmm3, [rsi] - movdqu xmm4, [rsi+rdx] - movdqu xmm5, [rsi+rdx*2] - add rsi, rax - movdqu xmm6, [rsi] - movdqu xmm7, [rsi+rdx] - - movdqa XMMWORD PTR [rsp], xmm0 - - movdqu xmm0, [rsi+rdx*2] - - movdqa XMMWORD PTR [rsp+16], xmm1 - movdqa XMMWORD PTR [rsp+32], xmm2 - movdqa XMMWORD PTR [rsp+48], xmm3 - movdqa XMMWORD PTR [rsp+64], xmm4 - movdqa XMMWORD PTR [rsp+80], xmm5 - movdqa XMMWORD PTR [rsp+96], xmm6 - movdqa XMMWORD PTR [rsp+112], xmm7 - movdqa XMMWORD PTR [rsp+128], xmm0 - - movsxd rax, dword ptr arg(2) ; xoffset - cmp rax, 0 ; skip first_pass filter if xoffset=0 - je .b8x8_sp_only - - shl rax, 4 - add rax, rcx ; HFilter - - mov rdi, arg(4) ; dst_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm0, [rax] - - movsxd rax, dword ptr arg(3) ; yoffset - cmp rax, 0 ; skip second_pass filter if yoffset=0 - je .b8x8_fp_only - - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - lea rcx, [rdi+rdx*8] - - movdqa xmm1, [rax] - - ; get the first horizontal line done - movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx - - psrldq xmm5, 1 - lea rsp, [rsp + 16] ; next line - - punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 - pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm7, xmm3 - packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - -.next_row: - movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - lea rsp, [rsp + 16] ; next line - - movdqa xmm5, xmm6 - - psrldq xmm5, 1 - - punpcklbw xmm6, xmm5 - pmaddubsw xmm6, xmm0 - - paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value - psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 - - packuswb xmm6, xmm6 - - punpcklbw xmm7, xmm6 - pmaddubsw xmm7, xmm1 - - paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value - psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 - - packuswb xmm7, xmm7 - - movq [rdi], xmm7 ; store the results in the destination - lea rdi, [rdi + rdx] - - movdqa xmm7, xmm6 - - cmp rdi, rcx - jne .next_row - - jmp .done8x8 - -.b8x8_sp_only: - movsxd rax, dword ptr arg(3) ; yoffset - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - mov rdi, arg(4) ;dst_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm0, [rax] ; VFilter - - movq xmm1, XMMWORD PTR [rsp] - movq xmm2, XMMWORD PTR [rsp+16] - - movq xmm3, XMMWORD PTR [rsp+32] - punpcklbw xmm1, xmm2 - - movq xmm4, XMMWORD PTR [rsp+48] - punpcklbw xmm2, xmm3 - - movq xmm5, XMMWORD PTR [rsp+64] - punpcklbw xmm3, xmm4 - - movq xmm6, XMMWORD PTR [rsp+80] - punpcklbw xmm4, xmm5 - - movq xmm7, XMMWORD PTR [rsp+96] - punpcklbw xmm5, xmm6 - - pmaddubsw xmm1, xmm0 - pmaddubsw xmm2, xmm0 - - pmaddubsw xmm3, xmm0 - pmaddubsw xmm4, xmm0 - - pmaddubsw xmm5, xmm0 - punpcklbw xmm6, xmm7 - - pmaddubsw xmm6, xmm0 - paddw xmm1, [GLOBAL(rd)] - - paddw xmm2, [GLOBAL(rd)] - psraw xmm1, VP8_FILTER_SHIFT - - paddw xmm3, [GLOBAL(rd)] - psraw xmm2, VP8_FILTER_SHIFT - - paddw xmm4, [GLOBAL(rd)] - psraw xmm3, VP8_FILTER_SHIFT - - paddw xmm5, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - paddw xmm6, [GLOBAL(rd)] - psraw xmm5, VP8_FILTER_SHIFT - - psraw xmm6, VP8_FILTER_SHIFT - packuswb xmm1, xmm1 - - packuswb xmm2, xmm2 - movq [rdi], xmm1 - - packuswb xmm3, xmm3 - movq [rdi+rdx], xmm2 - - packuswb xmm4, xmm4 - movq xmm1, XMMWORD PTR [rsp+112] - - lea rdi, [rdi + 2*rdx] - movq xmm2, XMMWORD PTR [rsp+128] - - packuswb xmm5, xmm5 - movq [rdi], xmm3 - - packuswb xmm6, xmm6 - movq [rdi+rdx], xmm4 - - lea rdi, [rdi + 2*rdx] - punpcklbw xmm7, xmm1 - - movq [rdi], xmm5 - pmaddubsw xmm7, xmm0 - - movq [rdi+rdx], xmm6 - punpcklbw xmm1, xmm2 - - pmaddubsw xmm1, xmm0 - paddw xmm7, [GLOBAL(rd)] - - psraw xmm7, VP8_FILTER_SHIFT - paddw xmm1, [GLOBAL(rd)] - - psraw xmm1, VP8_FILTER_SHIFT - packuswb xmm7, xmm7 - - packuswb xmm1, xmm1 - lea rdi, [rdi + 2*rdx] - - movq [rdi], xmm7 - - movq [rdi+rdx], xmm1 - lea rsp, [rsp + 144] - - jmp .done8x8 - -.b8x8_fp_only: - lea rcx, [rdi+rdx*8] - -.next_row_fp: - movdqa xmm1, XMMWORD PTR [rsp] - movdqa xmm3, XMMWORD PTR [rsp+16] - - movdqa xmm2, xmm1 - movdqa xmm5, XMMWORD PTR [rsp+32] - - psrldq xmm2, 1 - movdqa xmm7, XMMWORD PTR [rsp+48] - - movdqa xmm4, xmm3 - psrldq xmm4, 1 - - movdqa xmm6, xmm5 - psrldq xmm6, 1 - - punpcklbw xmm1, xmm2 - pmaddubsw xmm1, xmm0 - - punpcklbw xmm3, xmm4 - pmaddubsw xmm3, xmm0 - - punpcklbw xmm5, xmm6 - pmaddubsw xmm5, xmm0 - - movdqa xmm2, xmm7 - psrldq xmm2, 1 - - punpcklbw xmm7, xmm2 - pmaddubsw xmm7, xmm0 - - paddw xmm1, [GLOBAL(rd)] - psraw xmm1, VP8_FILTER_SHIFT - - paddw xmm3, [GLOBAL(rd)] - psraw xmm3, VP8_FILTER_SHIFT - - paddw xmm5, [GLOBAL(rd)] - psraw xmm5, VP8_FILTER_SHIFT - - paddw xmm7, [GLOBAL(rd)] - psraw xmm7, VP8_FILTER_SHIFT - - packuswb xmm1, xmm1 - packuswb xmm3, xmm3 - - packuswb xmm5, xmm5 - movq [rdi], xmm1 - - packuswb xmm7, xmm7 - movq [rdi+rdx], xmm3 - - lea rdi, [rdi + 2*rdx] - movq [rdi], xmm5 - - lea rsp, [rsp + 4*16] - movq [rdi+rdx], xmm7 - - lea rdi, [rdi + 2*rdx] - cmp rdi, rcx - - jne .next_row_fp - - lea rsp, [rsp + 16] - -.done8x8: - ;add rsp, 144 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -shuf1b: - db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 -shuf2b: - db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11 -shuf3b: - db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10 - -align 16 -shuf2bfrom1: - db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13 -align 16 -shuf3bfrom1: - db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11 - -align 16 -rd: - times 8 dw 0x40 - -align 16 -k0_k5: - times 8 db 0, 0 ;placeholder - times 8 db 0, 0 - times 8 db 2, 1 - times 8 db 0, 0 - times 8 db 3, 3 - times 8 db 0, 0 - times 8 db 1, 2 - times 8 db 0, 0 -k1_k3: - times 8 db 0, 0 ;placeholder - times 8 db -6, 12 - times 8 db -11, 36 - times 8 db -9, 50 - times 8 db -16, 77 - times 8 db -6, 93 - times 8 db -8, 108 - times 8 db -1, 123 -k2_k4: - times 8 db 128, 0 ;placeholder - times 8 db 123, -1 - times 8 db 108, -8 - times 8 db 93, -6 - times 8 db 77, -16 - times 8 db 50, -9 - times 8 db 36, -11 - times 8 db 12, -6 -align 16 -vp8_bilinear_filters_ssse3: - times 8 db 128, 0 - times 8 db 112, 16 - times 8 db 96, 32 - times 8 db 80, 48 - times 8 db 64, 64 - times 8 db 48, 80 - times 8 db 32, 96 - times 8 db 16, 112 - diff --git a/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c b/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c deleted file mode 100644 index fb0b57eb1c..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c +++ /dev/null @@ -1,625 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx_ports/mem.h" -#include "filter_x86.h" - -extern const short vp8_six_tap_mmx[8][6*8]; - -extern void vp8_filter_block1d_h6_mmx -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1dc_v6_mmx -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d8_h6_sse2 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d16_h6_sse2 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d8_v6_sse2 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int dst_ptich, - unsigned int pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d16_v6_sse2 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int dst_ptich, - unsigned int pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_unpack_block1d16_h6_sse2 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_height, - unsigned int output_width -); -extern void vp8_filter_block1d8_h6_only_sse2 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - int dst_ptich, - unsigned int output_height, - const short *vp8_filter -); -extern void vp8_filter_block1d16_h6_only_sse2 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - int dst_ptich, - unsigned int output_height, - const short *vp8_filter -); -extern void vp8_filter_block1d8_v6_only_sse2 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - int dst_ptich, - unsigned int output_height, - const short *vp8_filter -); - - -#if HAVE_MMX -void vp8_sixtap_predict4x4_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */ - const short *HFilter, *VFilter; - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter); - -} - - -void vp8_sixtap_predict16x16_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - - DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - - HFilter = vp8_six_tap_mmx[xoffset]; - - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter); - - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter); - -} - - -void vp8_sixtap_predict8x8_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter); - - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, 8, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter); - -} - - -void vp8_sixtap_predict8x4_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter); - - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, 8, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter); - -} - - - -void vp8_bilinear_predict16x16_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - vp8_bilinear_predict8x8_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pitch); - vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch); - vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8, dst_pitch); - vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch); -} -#endif - - -#if HAVE_SSE2 -void vp8_sixtap_predict16x16_sse2 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch - -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - if (xoffset) - { - if (yoffset) - { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); - } - else - { - /* First-pass only */ - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter); - } - } - else - { - /* Second-pass only */ - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32); - vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); - } -} - - -void vp8_sixtap_predict8x8_sse2 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - const short *HFilter, *VFilter; - - if (xoffset) - { - if (yoffset) - { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter); - } - else - { - /* First-pass only */ - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter); - } - } - else - { - /* Second-pass only */ - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter); - } -} - - -void vp8_sixtap_predict8x4_sse2 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - const short *HFilter, *VFilter; - - if (xoffset) - { - if (yoffset) - { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter); - } - else - { - /* First-pass only */ - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter); - } - } - else - { - /* Second-pass only */ - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter); - } -} - -#endif - -#if HAVE_SSSE3 - -extern void vp8_filter_block1d8_h6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d16_h6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d16_v6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d8_v6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d4_h6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d4_v6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -void vp8_sixtap_predict16x16_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch - -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[24*24]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, FData2, - 16, 21, xoffset); - vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, - 16, yoffset); - } - else - { - /* First-pass only */ - vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 16, xoffset); - } - } - else - { - if (yoffset) - { - /* Second-pass only */ - vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 16, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); - } - } -} - -void vp8_sixtap_predict8x8_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[256]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, FData2, - 8, 13, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, - 8, yoffset); - } - else - { - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 8, xoffset); - } - } - else - { - if (yoffset) - { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 8, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); - } - } -} - - -void vp8_sixtap_predict8x4_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[256]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, FData2, - 8, 9, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, - 4, yoffset); - } - else - { - /* First-pass only */ - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 4, xoffset); - } - } - else - { - if (yoffset) - { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 4, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); - } - } -} - -void vp8_sixtap_predict4x4_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[4*9]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - FData2, 4, 9, xoffset); - vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, - 4, yoffset); - } - else - { - vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 4, xoffset); - } - } - else - { - if (yoffset) - { - vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 4, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - int r; - - for (r = 0; r < 4; r++) - { - dst_ptr[0] = src_ptr[0]; - dst_ptr[1] = src_ptr[1]; - dst_ptr[2] = src_ptr[2]; - dst_ptr[3] = src_ptr[3]; - dst_ptr += dst_pitch; - src_ptr += src_pixels_per_line; - } - } - } -} - -#endif diff --git a/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm b/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm deleted file mode 100644 index 88a07b9f3f..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm +++ /dev/null @@ -1,1753 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vp8_loop_filter_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE -sym(vp8_loop_filter_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - movsxd rcx, dword ptr arg(5) ;count -.next8_h: - mov rdx, arg(3) ;limit - movq mm7, [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - ; calculate breakout conditions - movq mm2, [rdi+2*rax] ; q3 - movq mm1, [rsi+2*rax] ; q2 - movq mm6, mm1 ; q2 - psubusb mm1, mm2 ; q2-=q3 - psubusb mm2, mm6 ; q3-=q2 - por mm1, mm2 ; abs(q3-q2) - psubusb mm1, mm7 ; - - - movq mm4, [rsi+rax] ; q1 - movq mm3, mm4 ; q1 - psubusb mm4, mm6 ; q1-=q2 - psubusb mm6, mm3 ; q2-=q1 - por mm4, mm6 ; abs(q2-q1) - - psubusb mm4, mm7 - por mm1, mm4 - - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - psubusb mm4, mm3 ; q0-=q1 - psubusb mm3, mm0 ; q1-=q0 - por mm4, mm3 ; abs(q0-q1) - movq t0, mm4 ; save to t0 - psubusb mm4, mm7 - por mm1, mm4 - - - neg rax ; negate pitch to deal with above border - - movq mm2, [rsi+4*rax] ; p3 - movq mm4, [rdi+4*rax] ; p2 - movq mm5, mm4 ; p2 - psubusb mm4, mm2 ; p2-=p3 - psubusb mm2, mm5 ; p3-=p2 - por mm4, mm2 ; abs(p3 - p2) - psubusb mm4, mm7 - por mm1, mm4 - - - movq mm4, [rsi+2*rax] ; p1 - movq mm3, mm4 ; p1 - psubusb mm4, mm5 ; p1-=p2 - psubusb mm5, mm3 ; p2-=p1 - por mm4, mm5 ; abs(p2 - p1) - psubusb mm4, mm7 - por mm1, mm4 - - movq mm2, mm3 ; p1 - - movq mm4, [rsi+rax] ; p0 - movq mm5, mm4 ; p0 - psubusb mm4, mm3 ; p0-=p1 - psubusb mm3, mm5 ; p1-=p0 - por mm4, mm3 ; abs(p1 - p0) - movq t1, mm4 ; save to t1 - psubusb mm4, mm7 - por mm1, mm4 - - movq mm3, [rdi] ; q1 - movq mm4, mm3 ; q1 - psubusb mm3, mm2 ; q1-=p1 - psubusb mm2, mm4 ; p1-=q1 - por mm2, mm3 ; abs(p1-q1) - pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm2, 1 ; abs(p1-q1)/2 - - movq mm6, mm5 ; p0 - movq mm3, [rsi] ; q0 - psubusb mm5, mm3 ; p0-=q0 - psubusb mm3, mm6 ; q0-=p0 - por mm5, mm3 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] ; blimit - - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm5 - pxor mm5, mm5 - pcmpeqb mm1, mm5 ; mask mm1 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb mm4, mm5 - - pcmpeqb mm5, mm5 - pxor mm4, mm5 - - - ; start work on filters - movq mm2, [rsi+2*rax] ; p1 - movq mm7, [rdi] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - movq mm2, mm1 - paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - - pxor mm0, mm0 ; - pxor mm5, mm5 - punpcklbw mm0, mm2 ; - punpckhbw mm5, mm2 ; - psraw mm0, 11 ; - psraw mm5, 11 - packsswb mm0, mm5 - movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor mm0, mm0 ; 0 - movq mm5, mm1 ; abcdefgh - punpcklbw mm0, mm1 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - pxor mm1, mm1 ; 0 - punpckhbw mm1, mm5 ; a0b0c0d0 - psraw mm1, 11 ; sign extended shift right by 3 - movq mm5, mm0 ; save results - - packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [GLOBAL(ones)] - paddsw mm1, [GLOBAL(ones)] - psraw mm5, 1 ; partial shifted one more time for 2nd tap - psraw mm1, 1 ; partial shifted one more time for 2nd tap - packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - pandn mm4, mm5 ; high edge variance additive - - paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+rax], mm6 ; write back - - movq mm6, [rsi+2*rax] ; p1 - pxor mm6, [GLOBAL(t80)] ; reoffset - paddsb mm6, mm4 ; p1+= p1 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+2*rax], mm6 ; write back - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - movq [rsi], mm3 ; write back - - psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [GLOBAL(t80)] ; unoffset - movq [rdi], mm7 ; write back - - add rsi,8 - neg rax - dec rcx - jnz .next8_h - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE -sym(vp8_loop_filter_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 64 ; reserve 64 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[32]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - - movsxd rcx, dword ptr arg(5) ;count -.next8_v: - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - - ;transpose - movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 - movq mm7, mm6 ; 77 76 75 74 73 72 71 70 - - punpckhbw mm7, [rdi+2*rax] ; 77 67 76 66 75 65 74 64 - punpcklbw mm6, [rdi+2*rax] ; 73 63 72 62 71 61 70 60 - - movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 - movq mm5, mm4 ; 47 46 45 44 43 42 41 40 - - punpckhbw mm5, [rsi+rax] ; 57 47 56 46 55 45 54 44 - punpcklbw mm4, [rsi+rax] ; 53 43 52 42 51 41 50 40 - - movq mm3, mm5 ; 57 47 56 46 55 45 54 44 - punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 - - punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 - movq mm2, mm4 ; 53 43 52 42 51 41 50 40 - - punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 - punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 - - neg rax - movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 - - movq mm1, mm6 ; 27 26 25 24 23 22 21 20 - punpckhbw mm6, [rsi+rax] ; 37 27 36 36 35 25 34 24 - - punpcklbw mm1, [rsi+rax] ; 33 23 32 22 31 21 30 20 - movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 - - punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 - movq mm0, mm7 ; 17 07 16 06 15 05 14 04 - - punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 - punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 - - movq mm6, mm7 ; 37 27 17 07 36 26 16 06 - punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 - - punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 - - movq mm5, mm6 ; 76 66 56 46 36 26 16 06 - psubusb mm5, mm7 ; q2-q3 - - psubusb mm7, mm6 ; q3-q2 - por mm7, mm5; ; mm7=abs (q3-q2) - - movq mm5, mm0 ; 35 25 15 05 34 24 14 04 - punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 - - punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 - movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 - - psubusb mm3, mm6 ; q1-q2 - psubusb mm6, mm5 ; q2-q1 - - por mm6, mm3 ; mm6=abs(q2-q1) - lea rdx, srct - - movq [rdx+24], mm5 ; save q1 - movq [rdx+16], mm0 ; save q0 - - movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 - punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 - - movq mm0, mm3 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 31 21 11 01 30 20 10 00 - - punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 - punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 - - movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 - psubusb mm2, mm0 ; p2-p3 - - psubusb mm0, mm1 ; p3-p2 - por mm0, mm2 ; mm0=abs(p3-p2) - - movq mm2, mm3 ; 33 23 13 03 32 22 12 02 - punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 - - punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 - movq [rdx+8], mm3 ; save p0 - - movq [rdx], mm2 ; save p1 - movq mm5, mm2 ; mm5 = p1 - - psubusb mm2, mm1 ; p1-p2 - psubusb mm1, mm5 ; p2-p1 - - por mm1, mm2 ; mm1=abs(p2-p1) - mov rdx, arg(3) ;limit - - movq mm4, [rdx] ; mm4 = limit - psubusb mm7, mm4 - - psubusb mm0, mm4 - psubusb mm1, mm4 - - psubusb mm6, mm4 - por mm7, mm6 - - por mm0, mm1 - por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movq mm1, mm5 ; p1 - - movq mm7, mm3 ; mm3=mm7=p0 - psubusb mm7, mm5 ; p0 - p1 - - psubusb mm5, mm3 ; p1 - p0 - por mm5, mm7 ; abs(p1-p0) - - movq t0, mm5 ; save abs(p1-p0) - lea rdx, srct - - psubusb mm5, mm4 - por mm0, mm5 ; mm0=mask - - movq mm5, [rdx+16] ; mm5=q0 - movq mm7, [rdx+24] ; mm7=q1 - - movq mm6, mm5 ; mm6=q0 - movq mm2, mm7 ; q1 - psubusb mm5, mm7 ; q0-q1 - - psubusb mm7, mm6 ; q1-q0 - por mm7, mm5 ; abs(q1-q0) - - movq t1, mm7 ; save abs(q1-q0) - psubusb mm7, mm4 - - por mm0, mm7 ; mask - - movq mm5, mm2 ; q1 - psubusb mm5, mm1 ; q1-=p1 - psubusb mm1, mm2 ; p1-=q1 - por mm5, mm1 ; abs(p1-q1) - pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; - - movq mm4, [rdx] ;blimit - movq mm1, mm3 ; mm1=mm3=p0 - - movq mm7, mm6 ; mm7=mm6=q0 - psubusb mm1, mm7 ; p0-q0 - - psubusb mm7, mm3 ; q0-p0 - por mm1, mm7 ; abs(q0-p0) - paddusb mm1, mm1 ; abs(q0-p0)*2 - paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm0; ; mask - - pxor mm0, mm0 - pcmpeqb mm1, mm0 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] - ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - - por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb mm4, mm0 - - pcmpeqb mm0, mm0 - pxor mm4, mm0 - - - - ; start work on filters - lea rdx, srct - - movq mm2, [rdx] ; p1 - movq mm7, [rdx+24] ; q1 - - movq mm6, [rdx+8] ; p0 - movq mm0, [rdx+16] ; q0 - - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - - psubsb mm2, mm7 ; p1 - q1 - pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - - paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - - paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - movq mm2, mm1 - paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - - paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - pxor mm0, mm0 ; - - pxor mm5, mm5 - punpcklbw mm0, mm2 ; - - punpckhbw mm5, mm2 ; - psraw mm0, 11 ; - - psraw mm5, 11 - packsswb mm0, mm5 - - movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor mm0, mm0 ; 0 - movq mm5, mm1 ; abcdefgh - - punpcklbw mm0, mm1 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - - pxor mm1, mm1 ; 0 - punpckhbw mm1, mm5 ; a0b0c0d0 - - psraw mm1, 11 ; sign extended shift right by 3 - movq mm5, mm0 ; save results - - packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [GLOBAL(ones)] - - paddsw mm1, [GLOBAL(ones)] - psraw mm5, 1 ; partial shifted one more time for 2nd tap - - psraw mm1, 1 ; partial shifted one more time for 2nd tap - packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - - pandn mm4, mm5 ; high edge variance additive - - paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - - ; mm6=p0 ; - movq mm1, [rdx] ; p1 - pxor mm1, [GLOBAL(t80)] ; reoffset - - paddsb mm1, mm4 ; p1+= p1 add - pxor mm1, [GLOBAL(t80)] ; unoffset - ; mm6 = p0 mm1 = p1 - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - - ; mm3 = q0 - psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [GLOBAL(t80)] ; unoffset - ; mm7 = q1 - - ; transpose and write back - ; mm1 = 72 62 52 42 32 22 12 02 - ; mm6 = 73 63 53 43 33 23 13 03 - ; mm3 = 74 64 54 44 34 24 14 04 - ; mm7 = 75 65 55 45 35 25 15 05 - - movq mm2, mm1 ; 72 62 52 42 32 22 12 02 - punpcklbw mm2, mm6 ; 33 32 23 22 13 12 03 02 - - movq mm4, mm3 ; 74 64 54 44 34 24 14 04 - punpckhbw mm1, mm6 ; 73 72 63 62 53 52 43 42 - - punpcklbw mm4, mm7 ; 35 34 25 24 15 14 05 04 - punpckhbw mm3, mm7 ; 75 74 65 64 55 54 45 44 - - movq mm6, mm2 ; 33 32 23 22 13 12 03 02 - punpcklwd mm2, mm4 ; 15 14 13 12 05 04 03 02 - - punpckhwd mm6, mm4 ; 35 34 33 32 25 24 23 22 - movq mm5, mm1 ; 73 72 63 62 53 52 43 42 - - punpcklwd mm1, mm3 ; 55 54 53 52 45 44 43 42 - punpckhwd mm5, mm3 ; 75 74 73 72 65 64 63 62 - - - ; mm2 = 15 14 13 12 05 04 03 02 - ; mm6 = 35 34 33 32 25 24 23 22 - ; mm5 = 55 54 53 52 45 44 43 42 - ; mm1 = 75 74 73 72 65 64 63 62 - - - - movd [rsi+rax*4+2], mm2 - psrlq mm2, 32 - - movd [rdi+rax*4+2], mm2 - movd [rsi+rax*2+2], mm6 - - psrlq mm6, 32 - movd [rsi+rax+2],mm6 - - movd [rsi+2], mm1 - psrlq mm1, 32 - - movd [rdi+2], mm1 - neg rax - - movd [rdi+rax+2],mm5 - psrlq mm5, 32 - - movd [rdi+rax*2+2], mm5 - - lea rsi, [rsi+rax*8] - dec rcx - jnz .next8_v - - add rsp, 64 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE -sym(vp8_mbloop_filter_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - movsxd rcx, dword ptr arg(5) ;count -.next8_mbh: - mov rdx, arg(3) ;limit - movq mm7, [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - ; calculate breakout conditions - movq mm2, [rdi+2*rax] ; q3 - - movq mm1, [rsi+2*rax] ; q2 - movq mm6, mm1 ; q2 - psubusb mm1, mm2 ; q2-=q3 - psubusb mm2, mm6 ; q3-=q2 - por mm1, mm2 ; abs(q3-q2) - psubusb mm1, mm7 - - - ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit - movq mm4, [rsi+rax] ; q1 - movq mm3, mm4 ; q1 - psubusb mm4, mm6 ; q1-=q2 - psubusb mm6, mm3 ; q2-=q1 - por mm4, mm6 ; abs(q2-q1) - psubusb mm4, mm7 - por mm1, mm4 - - - ; mm1 = mask, mm3=q1, mm7 = limit - - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - psubusb mm4, mm3 ; q0-=q1 - psubusb mm3, mm0 ; q1-=q0 - por mm4, mm3 ; abs(q0-q1) - movq t0, mm4 ; save to t0 - psubusb mm4, mm7 - por mm1, mm4 - - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - neg rax ; negate pitch to deal with above border - - movq mm2, [rsi+4*rax] ; p3 - movq mm4, [rdi+4*rax] ; p2 - movq mm5, mm4 ; p2 - psubusb mm4, mm2 ; p2-=p3 - psubusb mm2, mm5 ; p3-=p2 - por mm4, mm2 ; abs(p3 - p2) - psubusb mm4, mm7 - por mm1, mm4 - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - movq mm4, [rsi+2*rax] ; p1 - movq mm3, mm4 ; p1 - psubusb mm4, mm5 ; p1-=p2 - psubusb mm5, mm3 ; p2-=p1 - por mm4, mm5 ; abs(p2 - p1) - psubusb mm4, mm7 - por mm1, mm4 - - movq mm2, mm3 ; p1 - - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - movq mm4, [rsi+rax] ; p0 - movq mm5, mm4 ; p0 - psubusb mm4, mm3 ; p0-=p1 - psubusb mm3, mm5 ; p1-=p0 - por mm4, mm3 ; abs(p1 - p0) - movq t1, mm4 ; save to t1 - psubusb mm4, mm7 - por mm1, mm4 - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm5 = p0 - movq mm3, [rdi] ; q1 - movq mm4, mm3 ; q1 - psubusb mm3, mm2 ; q1-=p1 - psubusb mm2, mm4 ; p1-=q1 - por mm2, mm3 ; abs(p1-q1) - pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm2, 1 ; abs(p1-q1)/2 - - movq mm6, mm5 ; p0 - movq mm3, mm0 ; q0 - psubusb mm5, mm3 ; p0-=q0 - psubusb mm3, mm6 ; q0-=p0 - por mm5, mm3 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] ; blimit - - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm5 - pxor mm5, mm5 - pcmpeqb mm1, mm5 ; mask mm1 - - ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb mm4, mm5 - - pcmpeqb mm5, mm5 - pxor mm4, mm5 - - - - ; mm1 = mask, mm0=q0, mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, mm4=hev - ; start work on filters - movq mm2, [rsi+2*rax] ; p1 - movq mm7, [rdi] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) - paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - - ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 - movq mm2, mm1 ; vp8_filter - pand mm2, mm4; ; Filter2 = vp8_filter & hev - - movq mm5, mm2 ; - paddsb mm5, [GLOBAL(t3)]; - - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm5 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm5 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - movq mm5, mm0 ; Filter2 - - paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm2 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm2 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 - psubsb mm3, mm0 ; qs0 =qs0 - filter1 - paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 - - ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn mm4, mm1 ; vp8_filter&=~hev - - - ; mm3=qs0, mm4=filter2, mm6=ps0 - - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor mm0, mm0 - - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s27)] - pmulhw mm2, [GLOBAL(s27)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - psubsb mm3, mm1 - paddsb mm6, mm1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - movq [rsi+rax], mm6 - movq [rsi], mm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s18)] - pmulhw mm2, [GLOBAL(s18)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm3, [rdi] - movq mm6, [rsi+rax*2] ; p1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdi], mm3 - movq [rsi+rax*2], mm6 - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s9)] - pmulhw mm2, [GLOBAL(s9)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - - movq mm6, [rdi+rax*4] - neg rax - movq mm3, [rdi+rax ] - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdi+rax ], mm3 - neg rax - movq [rdi+rax*4], mm6 - -;EARLY_BREAK_OUT: - neg rax - add rsi,8 - dec rcx - jnz .next8_mbh - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE -sym(vp8_mbloop_filter_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 96 ; reserve 96 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[64]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - - movsxd rcx, dword ptr arg(5) ;count -.next8_mbv: - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - - ;transpose - movq mm0, [rdi+2*rax] ; 77 76 75 74 73 72 71 70 - movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 - - movq mm7, mm6 ; 77 76 75 74 73 72 71 70 - punpckhbw mm7, mm0 ; 77 67 76 66 75 65 74 64 - - punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 - movq mm0, [rsi+rax] ; 57 56 55 54 53 52 51 50 - - movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 - movq mm5, mm4 ; 47 46 45 44 43 42 41 40 - - punpckhbw mm5, mm0 ; 57 47 56 46 55 45 54 44 - punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 - - movq mm3, mm5 ; 57 47 56 46 55 45 54 44 - punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 - - punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 - movq mm2, mm4 ; 53 43 52 42 51 41 50 40 - - punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 - punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 - - neg rax - - movq mm7, [rsi+rax] ; 37 36 35 34 33 32 31 30 - movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 - - movq mm1, mm6 ; 27 26 25 24 23 22 21 20 - punpckhbw mm6, mm7 ; 37 27 36 36 35 25 34 24 - - punpcklbw mm1, mm7 ; 33 23 32 22 31 21 30 20 - - movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 - punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 - - movq mm0, mm7 ; 17 07 16 06 15 05 14 04 - punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 - - punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 - movq mm6, mm7 ; 37 27 17 07 36 26 16 06 - - punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 - punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 - - lea rdx, srct - movq mm5, mm6 ; 76 66 56 46 36 26 16 06 - - movq [rdx+56], mm7 - psubusb mm5, mm7 ; q2-q3 - - - movq [rdx+48], mm6 - psubusb mm7, mm6 ; q3-q2 - - por mm7, mm5; ; mm7=abs (q3-q2) - movq mm5, mm0 ; 35 25 15 05 34 24 14 04 - - punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 - punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 - - movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 - psubusb mm3, mm6 ; q1-q2 - - psubusb mm6, mm5 ; q2-q1 - por mm6, mm3 ; mm6=abs(q2-q1) - - movq [rdx+40], mm5 ; save q1 - movq [rdx+32], mm0 ; save q0 - - movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 - punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 - - movq mm0, mm3 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 31 21 11 01 30 20 10 00 - - punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 - punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 - - movq [rdx], mm0 ; save p3 - movq [rdx+8], mm1 ; save p2 - - movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 - psubusb mm2, mm0 ; p2-p3 - - psubusb mm0, mm1 ; p3-p2 - por mm0, mm2 ; mm0=abs(p3-p2) - - movq mm2, mm3 ; 33 23 13 03 32 22 12 02 - punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 - - punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 - movq [rdx+24], mm3 ; save p0 - - movq [rdx+16], mm2 ; save p1 - movq mm5, mm2 ; mm5 = p1 - - psubusb mm2, mm1 ; p1-p2 - psubusb mm1, mm5 ; p2-p1 - - por mm1, mm2 ; mm1=abs(p2-p1) - mov rdx, arg(3) ;limit - - movq mm4, [rdx] ; mm4 = limit - psubusb mm7, mm4 ; abs(q3-q2) > limit - - psubusb mm0, mm4 ; abs(p3-p2) > limit - psubusb mm1, mm4 ; abs(p2-p1) > limit - - psubusb mm6, mm4 ; abs(q2-q1) > limit - por mm7, mm6 ; or - - por mm0, mm1 ; - por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movq mm1, mm5 ; p1 - - movq mm7, mm3 ; mm3=mm7=p0 - psubusb mm7, mm5 ; p0 - p1 - - psubusb mm5, mm3 ; p1 - p0 - por mm5, mm7 ; abs(p1-p0) - - movq t0, mm5 ; save abs(p1-p0) - lea rdx, srct - - psubusb mm5, mm4 ; mm5 = abs(p1-p0) > limit - por mm0, mm5 ; mm0=mask - - movq mm5, [rdx+32] ; mm5=q0 - movq mm7, [rdx+40] ; mm7=q1 - - movq mm6, mm5 ; mm6=q0 - movq mm2, mm7 ; q1 - psubusb mm5, mm7 ; q0-q1 - - psubusb mm7, mm6 ; q1-q0 - por mm7, mm5 ; abs(q1-q0) - - movq t1, mm7 ; save abs(q1-q0) - psubusb mm7, mm4 ; mm7=abs(q1-q0)> limit - - por mm0, mm7 ; mask - - movq mm5, mm2 ; q1 - psubusb mm5, mm1 ; q1-=p1 - psubusb mm1, mm2 ; p1-=q1 - por mm5, mm1 ; abs(p1-q1) - pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; - - movq mm4, [rdx] ;blimit - movq mm1, mm3 ; mm1=mm3=p0 - - movq mm7, mm6 ; mm7=mm6=q0 - psubusb mm1, mm7 ; p0-q0 - - psubusb mm7, mm3 ; q0-p0 - por mm1, mm7 ; abs(q0-p0) - paddusb mm1, mm1 ; abs(q0-p0)*2 - paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm0; ; mask - - pxor mm0, mm0 - pcmpeqb mm1, mm0 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] - ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 ; abs(q1 - q0) > thresh - - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 ; abs(p1 - p0)> thresh - - por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb mm4, mm0 - - pcmpeqb mm0, mm0 - pxor mm4, mm0 - - - - - ; start work on filters - lea rdx, srct - - ; start work on filters - movq mm2, [rdx+16] ; p1 - movq mm7, [rdx+40] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - movq mm6, [rdx+24] ; p0 - movq mm0, [rdx+32] ; q0 - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) - paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 - movq mm2, mm1 ; vp8_filter - pand mm2, mm4; ; Filter2 = vp8_filter & hev - - movq mm5, mm2 ; - paddsb mm5, [GLOBAL(t3)]; - - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm5 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm5 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - movq mm5, mm0 ; Filter2 - - paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm2 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm2 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 - psubsb mm3, mm0 ; qs0 =qs0 - filter1 - paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 - - ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn mm4, mm1 ; vp8_filter&=~hev - - - ; mm3=qs0, mm4=filter2, mm6=ps0 - - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor mm0, mm0 - - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s27)] - pmulhw mm2, [GLOBAL(s27)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - psubsb mm3, mm1 - paddsb mm6, mm1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - movq [rdx+24], mm6 - movq [rdx+32], mm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s18)] - pmulhw mm2, [GLOBAL(s18)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm3, [rdx + 40] - movq mm6, [rdx + 16] ; p1 - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdx + 40], mm3 - movq [rdx + 16], mm6 - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s9)] - pmulhw mm2, [GLOBAL(s9)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm6, [rdx+ 8] - movq mm3, [rdx+48] - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01 - pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06 - - ; transpose and write back - movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00 - movq mm1, mm0 ; mm0 = 70 60 50 40 30 20 10 00 - - punpcklbw mm0, mm6 ; mm0 = 31 30 21 20 11 10 01 00 - punpckhbw mm1, mm6 ; mm3 = 71 70 61 60 51 50 41 40 - - movq mm2, [rdx+16] ; mm2 = 72 62 52 42 32 22 12 02 - movq mm6, mm2 ; mm3 = 72 62 52 42 32 22 12 02 - - punpcklbw mm2, [rdx+24] ; mm2 = 33 32 23 22 13 12 03 02 - punpckhbw mm6, [rdx+24] ; mm3 = 73 72 63 62 53 52 43 42 - - movq mm5, mm0 ; mm5 = 31 30 21 20 11 10 01 00 - punpcklwd mm0, mm2 ; mm0 = 13 12 11 10 03 02 01 00 - - punpckhwd mm5, mm2 ; mm5 = 33 32 31 30 23 22 21 20 - movq mm4, mm1 ; mm4 = 71 70 61 60 51 50 41 40 - - punpcklwd mm1, mm6 ; mm1 = 53 52 51 50 43 42 41 40 - punpckhwd mm4, mm6 ; mm4 = 73 72 71 70 63 62 61 60 - - movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 - punpcklbw mm2, [rdx+40] ; mm2 = 35 34 25 24 15 14 05 04 - - movq mm6, mm3 ; mm6 = 76 66 56 46 36 26 15 06 - punpcklbw mm6, [rdx+56] ; mm6 = 37 36 27 26 17 16 07 06 - - movq mm7, mm2 ; mm7 = 35 34 25 24 15 14 05 04 - punpcklwd mm2, mm6 ; mm2 = 17 16 15 14 07 06 05 04 - - punpckhwd mm7, mm6 ; mm7 = 37 36 35 34 27 26 25 24 - movq mm6, mm0 ; mm6 = 13 12 11 10 03 02 01 00 - - punpckldq mm0, mm2 ; mm0 = 07 06 05 04 03 02 01 00 - punpckhdq mm6, mm2 ; mm6 = 17 16 15 14 13 12 11 10 - - movq [rsi+rax*4], mm0 ; write out - movq [rdi+rax*4], mm6 ; write out - - movq mm0, mm5 ; mm0 = 33 32 31 30 23 22 21 20 - punpckldq mm0, mm7 ; mm0 = 27 26 25 24 23 22 20 20 - - punpckhdq mm5, mm7 ; mm5 = 37 36 35 34 33 32 31 30 - movq [rsi+rax*2], mm0 ; write out - - movq [rdi+rax*2], mm5 ; write out - movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 - - punpckhbw mm2, [rdx+40] ; mm2 = 75 74 65 64 54 54 45 44 - punpckhbw mm3, [rdx+56] ; mm3 = 77 76 67 66 57 56 47 46 - - movq mm5, mm2 ; mm5 = 75 74 65 64 54 54 45 44 - punpcklwd mm2, mm3 ; mm2 = 57 56 55 54 47 46 45 44 - - punpckhwd mm5, mm3 ; mm5 = 77 76 75 74 67 66 65 64 - movq mm0, mm1 ; mm0= 53 52 51 50 43 42 41 40 - - movq mm3, mm4 ; mm4 = 73 72 71 70 63 62 61 60 - punpckldq mm0, mm2 ; mm0 = 47 46 45 44 43 42 41 40 - - punpckhdq mm1, mm2 ; mm1 = 57 56 55 54 53 52 51 50 - movq [rsi], mm0 ; write out - - movq [rdi], mm1 ; write out - neg rax - - punpckldq mm3, mm5 ; mm3 = 67 66 65 64 63 62 61 60 - punpckhdq mm4, mm5 ; mm4 = 77 76 75 74 73 72 71 60 - - movq [rsi+rax*2], mm3 - movq [rdi+rax*2], mm4 - - lea rsi, [rsi+rax*8] - dec rcx - - jnz .next8_mbv - - add rsp, 96 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit -;) -global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE -sym(vp8_loop_filter_simple_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - mov rcx, 2 ; count -.nexts8_h: - mov rdx, arg(2) ;blimit ; get blimit - movq mm3, [rdx] ; - - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - neg rax - - ; calculate mask - movq mm1, [rsi+2*rax] ; p1 - movq mm0, [rdi] ; q1 - movq mm2, mm1 - movq mm7, mm0 - movq mm4, mm0 - psubusb mm0, mm1 ; q1-=p1 - psubusb mm1, mm4 ; p1-=q1 - por mm1, mm0 ; abs(p1-q1) - pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm1, 1 ; abs(p1-q1)/2 - - movq mm5, [rsi+rax] ; p0 - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - movq mm6, mm5 ; p0 - psubusb mm5, mm4 ; p0-=q0 - psubusb mm4, mm6 ; q0-=p0 - por mm5, mm4 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor mm3, mm3 - pcmpeqb mm5, mm3 - - ; start work on filters - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0) - paddsb mm2, mm0 ; p1 - q1 + 2 * (q0 - p0) - paddsb mm2, mm0 ; p1 - q1 + 3 * (q0 - p0) - pand mm5, mm2 ; mask filter values we don't care about - - ; do + 4 side - paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - movq mm1, mm5 ; get a copy of filters - psraw mm1, 11 ; arithmetic shift right 11 - psllw mm1, 8 ; shift left 8 to put it back - - por mm0, mm1 ; put the two together to get result - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - movq [rsi], mm3 ; write back - - - ; now do +3 side - psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - psraw mm5, 11 ; arithmetic shift right 11 - psllw mm5, 8 ; shift left 8 to put it back - por mm0, mm5 ; put the two together to get result - - - paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+rax], mm6 ; write back - - add rsi,8 - neg rax - dec rcx - jnz .nexts8_h - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit -;) -global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE -sym(vp8_loop_filter_simple_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4- 2]; ; - mov rcx, 2 ; count -.nexts8_v: - - lea rdi, [rsi + rax]; - movd mm0, [rdi + rax * 2] ; xx xx xx xx 73 72 71 70 - - movd mm6, [rsi + rax * 2] ; xx xx xx xx 63 62 61 60 - punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 - - movd mm0, [rsi + rax] ; xx xx xx xx 53 52 51 50 - movd mm4, [rsi] ; xx xx xx xx 43 42 41 40 - - punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 - movq mm5, mm4 ; 53 43 52 42 51 41 50 40 - - punpcklwd mm4, mm6 ; 71 61 51 41 70 60 50 40 - punpckhwd mm5, mm6 ; 73 63 53 43 72 62 52 42 - - neg rax - - movd mm7, [rsi + rax] ; xx xx xx xx 33 32 31 30 - movd mm6, [rsi + rax * 2] ; xx xx xx xx 23 22 21 20 - - punpcklbw mm6, mm7 ; 33 23 32 22 31 21 30 20 - movd mm1, [rdi + rax * 4] ; xx xx xx xx 13 12 11 10 - - movd mm0, [rsi + rax * 4] ; xx xx xx xx 03 02 01 00 - punpcklbw mm0, mm1 ; 13 03 12 02 11 01 10 00 - - movq mm2, mm0 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm6 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm2, mm6 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 13 03 12 02 11 01 10 00 - - punpckldq mm0, mm4 ; 70 60 50 40 30 20 10 00 = p1 - movq mm3, mm2 ; 33 23 13 03 32 22 12 02 - - punpckhdq mm1, mm4 ; 71 61 51 41 31 21 11 01 = p0 - punpckldq mm2, mm5 ; 72 62 52 42 32 22 12 02 = q0 - - punpckhdq mm3, mm5 ; 73 63 53 43 33 23 13 03 = q1 - - - ; calculate mask - movq mm6, mm0 ; p1 - movq mm7, mm3 ; q1 - psubusb mm7, mm6 ; q1-=p1 - psubusb mm6, mm3 ; p1-=q1 - por mm6, mm7 ; abs(p1-q1) - pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm6, 1 ; abs(p1-q1)/2 - - movq mm5, mm1 ; p0 - movq mm4, mm2 ; q0 - - psubusb mm5, mm2 ; p0-=q0 - psubusb mm4, mm1 ; q0-=p0 - - por mm5, mm4 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] - - psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor mm7, mm7 - pcmpeqb mm5, mm7 ; mm5 = mask - - ; start work on filters - movq t0, mm0 - movq t1, mm3 - - pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values - - psubsb mm0, mm3 ; p1 - q1 - movq mm6, mm1 ; p0 - - movq mm7, mm2 ; q0 - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - - pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm7 ; offseted ; q0 - - psubsb mm7, mm6 ; q0 - p0 - paddsb mm0, mm7 ; p1 - q1 + 1 * (q0 - p0) - - paddsb mm0, mm7 ; p1 - q1 + 2 * (q0 - p0) - paddsb mm0, mm7 ; p1 - q1 + 3 * (q0 - p0) - - pand mm5, mm0 ; mask filter values we don't care about - - paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - - movq mm7, mm5 ; get a copy of filters - psraw mm7, 11 ; arithmetic shift right 11 - psllw mm7, 8 ; shift left 8 to put it back - - por mm0, mm7 ; put the two together to get result - - psubsb mm3, mm0 ; q0-= q0sz add - pxor mm3, [GLOBAL(t80)] ; unoffset - - ; now do +3 side - psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - - psraw mm5, 11 ; arithmetic shift right 11 - psllw mm5, 8 ; shift left 8 to put it back - por mm0, mm5 ; put the two together to get result - - paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - - - movq mm0, t0 - movq mm4, t1 - - ; mm0 = 70 60 50 40 30 20 10 00 - ; mm6 = 71 61 51 41 31 21 11 01 - ; mm3 = 72 62 52 42 32 22 12 02 - ; mm4 = 73 63 53 43 33 23 13 03 - ; transpose back to write out - - movq mm1, mm0 ; - punpcklbw mm0, mm6 ; 31 30 21 20 11 10 01 00 - - punpckhbw mm1, mm6 ; 71 70 61 60 51 50 41 40 - movq mm2, mm3 ; - - punpcklbw mm2, mm4 ; 33 32 23 22 13 12 03 02 - movq mm5, mm1 ; 71 70 61 60 51 50 41 40 - - punpckhbw mm3, mm4 ; 73 72 63 62 53 52 43 42 - movq mm6, mm0 ; 31 30 21 20 11 10 01 00 - - punpcklwd mm0, mm2 ; 13 12 11 10 03 02 01 00 - punpckhwd mm6, mm2 ; 33 32 31 30 23 22 21 20 - - movd [rsi+rax*4], mm0 ; write 03 02 01 00 - punpcklwd mm1, mm3 ; 53 52 51 50 43 42 41 40 - - psrlq mm0, 32 ; xx xx xx xx 13 12 11 10 - punpckhwd mm5, mm3 ; 73 72 71 70 63 62 61 60 - - movd [rdi+rax*4], mm0 ; write 13 12 11 10 - movd [rsi+rax*2], mm6 ; write 23 22 21 20 - - psrlq mm6, 32 ; 33 32 31 30 - movd [rsi], mm1 ; write 43 42 41 40 - - movd [rsi + rax], mm6 ; write 33 32 31 30 - neg rax - - movd [rsi + rax*2], mm5 ; write 63 62 61 60 - psrlq mm1, 32 ; 53 52 51 50 - - movd [rdi], mm1 ; write out 53 52 51 50 - psrlq mm5, 32 ; 73 72 71 70 - - movd [rdi + rax*2], mm5 ; write 73 72 71 70 - - lea rsi, [rsi+rax*8] ; next 8 - - dec rcx - jnz .nexts8_v - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - -;void fast_loop_filter_vertical_edges_mmx(unsigned char *y_ptr, -; int y_stride, -; loop_filter_info *lfi) -;{ -; -; -; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+4, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+8, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+12, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -;} - -SECTION_RODATA -align 16 -tfe: - times 8 db 0xfe -align 16 -t80: - times 8 db 0x80 -align 16 -t1s: - times 8 db 0x01 -align 16 -t3: - times 8 db 0x03 -align 16 -t4: - times 8 db 0x04 -align 16 -ones: - times 4 dw 0x0001 -align 16 -s27: - times 4 dw 0x1b00 -align 16 -s18: - times 4 dw 0x1200 -align 16 -s9: - times 4 dw 0x0900 -align 16 -s63: - times 4 dw 0x003f diff --git a/thirdparty/libvpx/vp8/decoder/dboolhuff.c b/thirdparty/libvpx/vp8/decoder/dboolhuff.c deleted file mode 100644 index 5cdd2a2491..0000000000 --- a/thirdparty/libvpx/vp8/decoder/dboolhuff.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "dboolhuff.h" -#include "vp8/common/common.h" -#include "vpx_dsp/vpx_dsp_common.h" - -int vp8dx_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) -{ - br->user_buffer_end = source+source_sz; - br->user_buffer = source; - br->value = 0; - br->count = -8; - br->range = 255; - br->decrypt_cb = decrypt_cb; - br->decrypt_state = decrypt_state; - - if (source_sz && !source) - return 1; - - /* Populate the buffer */ - vp8dx_bool_decoder_fill(br); - - return 0; -} - -void vp8dx_bool_decoder_fill(BOOL_DECODER *br) -{ - const unsigned char *bufptr = br->user_buffer; - VP8_BD_VALUE value = br->value; - int count = br->count; - int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); - size_t bytes_left = br->user_buffer_end - bufptr; - size_t bits_left = bytes_left * CHAR_BIT; - int x = shift + CHAR_BIT - (int)bits_left; - int loop_end = 0; - unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1]; - - if (br->decrypt_cb) { - size_t n = VPXMIN(sizeof(decrypted), bytes_left); - br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n); - bufptr = decrypted; - } - - if(x >= 0) - { - count += VP8_LOTS_OF_BITS; - loop_end = x; - } - - if (x < 0 || bits_left) - { - while(shift >= loop_end) - { - count += CHAR_BIT; - value |= (VP8_BD_VALUE)*bufptr << shift; - ++bufptr; - ++br->user_buffer; - shift -= CHAR_BIT; - } - } - - br->value = value; - br->count = count; -} diff --git a/thirdparty/libvpx/vp8/decoder/dboolhuff.h b/thirdparty/libvpx/vp8/decoder/dboolhuff.h deleted file mode 100644 index 1b1bbf868e..0000000000 --- a/thirdparty/libvpx/vp8/decoder/dboolhuff.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_DECODER_DBOOLHUFF_H_ -#define VP8_DECODER_DBOOLHUFF_H_ - -#include -#include - -#include "./vpx_config.h" -#include "vpx_ports/mem.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef size_t VP8_BD_VALUE; - -#define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT) - -/*This is meant to be a large, positive constant that can still be efficiently - loaded as an immediate (on platforms like ARM, for example). - Even relatively modest values like 100 would work fine.*/ -#define VP8_LOTS_OF_BITS (0x40000000) - -typedef struct -{ - const unsigned char *user_buffer_end; - const unsigned char *user_buffer; - VP8_BD_VALUE value; - int count; - unsigned int range; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; -} BOOL_DECODER; - -DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); - -int vp8dx_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -void vp8dx_bool_decoder_fill(BOOL_DECODER *br); - - -static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { - unsigned int bit = 0; - VP8_BD_VALUE value; - unsigned int split; - VP8_BD_VALUE bigsplit; - int count; - unsigned int range; - - split = 1 + (((br->range - 1) * probability) >> 8); - - if(br->count < 0) - vp8dx_bool_decoder_fill(br); - - value = br->value; - count = br->count; - - bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); - - range = split; - - if (value >= bigsplit) - { - range = br->range - split; - value = value - bigsplit; - bit = 1; - } - - { - register int shift = vp8_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - } - br->value = value; - br->count = count; - br->range = range; - - return bit; -} - -static INLINE int vp8_decode_value(BOOL_DECODER *br, int bits) -{ - int z = 0; - int bit; - - for (bit = bits - 1; bit >= 0; bit--) - { - z |= (vp8dx_decode_bool(br, 0x80) << bit); - } - - return z; -} - -static INLINE int vp8dx_bool_error(BOOL_DECODER *br) -{ - /* Check if we have reached the end of the buffer. - * - * Variable 'count' stores the number of bits in the 'value' buffer, minus - * 8. The top byte is part of the algorithm, and the remainder is buffered - * to be shifted into it. So if count == 8, the top 16 bits of 'value' are - * occupied, 8 for the algorithm and 8 in the buffer. - * - * When reading a byte from the user's buffer, count is filled with 8 and - * one byte is filled into the value buffer. When we reach the end of the - * data, count is additionally filled with VP8_LOTS_OF_BITS. So when - * count == VP8_LOTS_OF_BITS - 1, the user's data has been exhausted. - */ - if ((br->count > VP8_BD_VALUE_SIZE) && (br->count < VP8_LOTS_OF_BITS)) - { - /* We have tried to decode bits after the end of - * stream was encountered. - */ - return 1; - } - - /* No error. */ - return 0; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DBOOLHUFF_H_ diff --git a/thirdparty/libvpx/vp8/decoder/decodeframe.c b/thirdparty/libvpx/vp8/decoder/decodeframe.c deleted file mode 100644 index 51acdbb9c8..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decodeframe.c +++ /dev/null @@ -1,1397 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "onyxd_int.h" -#include "vp8/common/header.h" -#include "vp8/common/reconintra4x4.h" -#include "vp8/common/reconinter.h" -#include "detokenize.h" -#include "vp8/common/common.h" -#include "vp8/common/invtrans.h" -#include "vp8/common/alloccommon.h" -#include "vp8/common/entropymode.h" -#include "vp8/common/quant_common.h" -#include "vpx_scale/vpx_scale.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/setupintrarecon.h" - -#include "decodemv.h" -#include "vp8/common/extend.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/threading.h" -#include "decoderthreading.h" -#include "dboolhuff.h" -#include "vpx_dsp/vpx_dsp_common.h" - -#include -#include - -void vp8cx_init_de_quantizer(VP8D_COMP *pbi) -{ - int Q; - VP8_COMMON *const pc = & pbi->common; - - for (Q = 0; Q < QINDEX_RANGE; Q++) - { - pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q); - pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); - pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); - - pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q); - pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); - pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); - } -} - -void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - int i; - int QIndex; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - VP8_COMMON *const pc = & pbi->common; - - /* Decide whether to use the default or alternate baseline Q value. */ - if (xd->segmentation_enabled) - { - /* Abs Value */ - if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - - /* Delta Value */ - else - QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ - } - else - QIndex = pc->base_qindex; - - /* Set up the macroblock dequant constants */ - xd->dequant_y1_dc[0] = 1; - xd->dequant_y1[0] = pc->Y1dequant[QIndex][0]; - xd->dequant_y2[0] = pc->Y2dequant[QIndex][0]; - xd->dequant_uv[0] = pc->UVdequant[QIndex][0]; - - for (i = 1; i < 16; i++) - { - xd->dequant_y1_dc[i] = - xd->dequant_y1[i] = pc->Y1dequant[QIndex][1]; - xd->dequant_y2[i] = pc->Y2dequant[QIndex][1]; - xd->dequant_uv[i] = pc->UVdequant[QIndex][1]; - } -} - -static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - unsigned int mb_idx) -{ - MB_PREDICTION_MODE mode; - int i; -#if CONFIG_ERROR_CONCEALMENT - int corruption_detected = 0; -#else - (void)mb_idx; -#endif - - if (xd->mode_info_context->mbmi.mb_skip_coeff) - { - vp8_reset_mb_tokens_context(xd); - } - else if (!vp8dx_bool_error(xd->current_bc)) - { - int eobtotal; - eobtotal = vp8_decode_mb_tokens(pbi, xd); - - /* Special case: Force the loopfilter to skip when eobtotal is zero */ - xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); - } - - mode = xd->mode_info_context->mbmi.mode; - - if (xd->segmentation_enabled) - vp8_mb_init_dequantizer(pbi, xd); - - -#if CONFIG_ERROR_CONCEALMENT - - if(pbi->ec_active) - { - int throw_residual; - /* When we have independent partitions we can apply residual even - * though other partitions within the frame are corrupt. - */ - throw_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual); - throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); - - if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) - { - /* MB with corrupt residuals or corrupt mode/motion vectors. - * Better to use the predictor as reconstruction. - */ - pbi->frame_corrupt_residual = 1; - memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - corruption_detected = 1; - - /* force idct to be skipped for B_PRED and use the - * prediction only for reconstruction - * */ - memset(xd->eobs, 0, 25); - } - } -#endif - - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { - vp8_build_intra_predictors_mbuv_s(xd, - xd->recon_above[1], - xd->recon_above[2], - xd->recon_left[1], - xd->recon_left[2], - xd->recon_left_stride[1], - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride); - - if (mode != B_PRED) - { - vp8_build_intra_predictors_mby_s(xd, - xd->recon_above[0], - xd->recon_left[0], - xd->recon_left_stride[0], - xd->dst.y_buffer, - xd->dst.y_stride); - } - else - { - short *DQC = xd->dequant_y1; - int dst_stride = xd->dst.y_stride; - - /* clear out residual eob info */ - if(xd->mode_info_context->mbmi.mb_skip_coeff) - memset(xd->eobs, 0, 25); - - intra_prediction_down_copy(xd, xd->recon_above[0] + 16); - - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; - unsigned char *dst = xd->dst.y_buffer + b->offset; - B_PREDICTION_MODE b_mode = - xd->mode_info_context->bmi[i].as_mode; - unsigned char *Above = dst - dst_stride; - unsigned char *yleft = dst - 1; - int left_stride = dst_stride; - unsigned char top_left = Above[-1]; - - vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, - dst, dst_stride, top_left); - - if (xd->eobs[i]) - { - if (xd->eobs[i] > 1) - { - vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); - } - else - { - vp8_dc_only_idct_add - (b->qcoeff[0] * DQC[0], - dst, dst_stride, - dst, dst_stride); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - } - } - } - } - else - { - vp8_build_inter_predictors_mb(xd); - } - - -#if CONFIG_ERROR_CONCEALMENT - if (corruption_detected) - { - return; - } -#endif - - if(!xd->mode_info_context->mbmi.mb_skip_coeff) - { - /* dequantization and idct */ - if (mode != B_PRED) - { - short *DQC = xd->dequant_y1; - - if (mode != SPLITMV) - { - BLOCKD *b = &xd->block[24]; - - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - vp8_dequantize_b(b, xd->dequant_y2); - - vp8_short_inv_walsh4x4(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); - } - else - { - b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; - vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - - /* override the dc dequant constant in order to preserve the - * dc components - */ - DQC = xd->dequant_y1_dc; - } - - vp8_dequant_idct_add_y_block - (xd->qcoeff, DQC, - xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); - } - - vp8_dequant_idct_add_uv_block - (xd->qcoeff+16*16, xd->dequant_uv, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs+16); - } -} - -static int get_delta_q(vp8_reader *bc, int prev, int *q_update) -{ - int ret_val = 0; - - if (vp8_read_bit(bc)) - { - ret_val = vp8_read_literal(bc, 4); - - if (vp8_read_bit(bc)) - ret_val = -ret_val; - } - - /* Trigger a quantizer update if the delta-q value has changed */ - if (ret_val != prev) - *q_update = 1; - - return ret_val; -} - -#ifdef PACKET_TESTING -#include -FILE *vpxlog = 0; -#endif - -static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1; - unsigned char *dest_ptr1; - - unsigned int Border; - int plane_stride; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - src_ptr1 = ybf->y_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)Border; i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - Border /= 2; - src_ptr1 = ybf->u_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - src_ptr1 = ybf->v_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } -} - -static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = ybf->uv_height; - Border /= 2; - - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } -} - -static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, - unsigned char *y_src, - unsigned char *u_src, - unsigned char *v_src) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = 16; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = y_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = 8; - plane_width = ybf->uv_width; - Border /= 2; - - /* copy the left and right most columns out */ - src_ptr1 = u_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - /* copy the left and right most columns out */ - src_ptr1 = v_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } -} - -static void decode_mb_rows(VP8D_COMP *pbi) -{ - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - - MODE_INFO *lf_mic = xd->mode_info_context; - - int ibc = 0; - int num_part = 1 << pc->multi_token_partition; - - int recon_yoffset, recon_uvoffset; - int mb_row, mb_col; - int mb_idx = 0; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - int recon_y_stride = yv12_fb_new->y_stride; - int recon_uv_stride = yv12_fb_new->uv_stride; - - unsigned char *ref_buffer[MAX_REF_FRAMES][3]; - unsigned char *dst_buffer[3]; - unsigned char *lf_dst[3]; - unsigned char *eb_dst[3]; - int i; - int ref_fb_corrupted[MAX_REF_FRAMES]; - - ref_fb_corrupted[INTRA_FRAME] = 0; - - for(i = 1; i < MAX_REF_FRAMES; i++) - { - YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; - - ref_buffer[i][0] = this_fb->y_buffer; - ref_buffer[i][1] = this_fb->u_buffer; - ref_buffer[i][2] = this_fb->v_buffer; - - ref_fb_corrupted[i] = this_fb->corrupted; - } - - /* Set up the buffer pointers */ - eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; - eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; - eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; - - xd->up_available = 0; - - /* Initialize the loop filter for this frame. */ - if(pc->filter_level) - vp8_loop_filter_frame_init(pc, xd, pc->filter_level); - - vp8_setup_intra_recon_top_line(yv12_fb_new); - - /* Decode the individual macro block */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) - { - if (num_part > 1) - { - xd->current_bc = & pbi->mbc[ibc]; - ibc++; - - if (ibc == num_part) - ibc = 0; - } - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - - /* reset contexts */ - xd->above_context = pc->above_context; - memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - - xd->left_available = 0; - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - xd->recon_above[0] = dst_buffer[0] + recon_yoffset; - xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; - xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; - - xd->recon_left[0] = xd->recon_above[0] - 1; - xd->recon_left[1] = xd->recon_above[1] - 1; - xd->recon_left[2] = xd->recon_above[2] - 1; - - xd->recon_above[0] -= xd->dst.y_stride; - xd->recon_above[1] -= xd->dst.uv_stride; - xd->recon_above[2] -= xd->dst.uv_stride; - - /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = xd->dst.y_stride; - xd->recon_left_stride[1] = xd->dst.uv_stride; - - setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], - xd->recon_left[2], xd->dst.y_stride, - xd->dst.uv_stride); - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { - /* Distance of Mb to the various image edges. - * These are specified to 8th pel as they are always compared to values - * that are in 1/8th pel units - */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - -#if CONFIG_ERROR_CONCEALMENT - { - int corrupt_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual) || - vp8dx_bool_error(xd->current_bc); - if (pbi->ec_active && - xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME && - corrupt_residual) - { - /* We have an intra block with corrupt coefficients, better to - * conceal with an inter block. Interpolate MVs from neighboring - * MBs. - * - * Note that for the first mb with corrupt residual in a frame, - * we might not discover that before decoding the residual. That - * happens after this check, and therefore no inter concealment - * will be done. - */ - vp8_interpolate_motion(xd, - mb_row, mb_col, - pc->mb_rows, pc->mb_cols); - } - } -#endif - - xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; - xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; - xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; - - if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { - const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; - xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; - xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; - xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; - } else { - // ref_frame is INTRA_FRAME, pre buffer should not be used. - xd->pre.y_buffer = 0; - xd->pre.u_buffer = 0; - xd->pre.v_buffer = 0; - } - - /* propagate errors from reference frames */ - xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; - - decode_macroblock(pbi, xd, mb_idx); - - mb_idx++; - xd->left_available = 1; - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= vp8dx_bool_error(xd->current_bc); - - xd->recon_above[0] += 16; - xd->recon_above[1] += 8; - xd->recon_above[2] += 8; - xd->recon_left[0] += 16; - xd->recon_left[1] += 8; - xd->recon_left[2] += 8; - - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; - } - - /* adjust to the next row of mbs */ - vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - ++xd->mode_info_context; /* skip prediction column */ - xd->up_available = 1; - - if(pc->filter_level) - { - if(mb_row > 0) - { - if (pc->filter_type == NORMAL_LOOPFILTER) - vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, - recon_y_stride, recon_uv_stride, - lf_dst[0], lf_dst[1], lf_dst[2]); - else - vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, - recon_y_stride, recon_uv_stride, - lf_dst[0], lf_dst[1], lf_dst[2]); - if(mb_row > 1) - { - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - - lf_dst[0] += recon_y_stride * 16; - lf_dst[1] += recon_uv_stride * 8; - lf_dst[2] += recon_uv_stride * 8; - lf_mic += pc->mb_cols; - lf_mic++; /* Skip border mb */ - } - } - else - { - if(mb_row > 0) - { - /**/ - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - } - } - - if(pc->filter_level) - { - if (pc->filter_type == NORMAL_LOOPFILTER) - vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); - else - vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); - - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - yv12_extend_frame_top_c(yv12_fb_new); - yv12_extend_frame_bottom_c(yv12_fb_new); - -} - -static unsigned int read_partition_size(VP8D_COMP *pbi, - const unsigned char *cx_size) -{ - unsigned char temp[3]; - if (pbi->decrypt_cb) - { - pbi->decrypt_cb(pbi->decrypt_state, cx_size, temp, 3); - cx_size = temp; - } - return cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16); -} - -static int read_is_valid(const unsigned char *start, - size_t len, - const unsigned char *end) -{ - return (start + len > start && start + len <= end); -} - -static unsigned int read_available_partition_size( - VP8D_COMP *pbi, - const unsigned char *token_part_sizes, - const unsigned char *fragment_start, - const unsigned char *first_fragment_end, - const unsigned char *fragment_end, - int i, - int num_part) -{ - VP8_COMMON* pc = &pbi->common; - const unsigned char *partition_size_ptr = token_part_sizes + i * 3; - unsigned int partition_size = 0; - ptrdiff_t bytes_left = fragment_end - fragment_start; - /* Calculate the length of this partition. The last partition - * size is implicit. If the partition size can't be read, then - * either use the remaining data in the buffer (for EC mode) - * or throw an error. - */ - if (i < num_part - 1) - { - if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) - partition_size = read_partition_size(pbi, partition_size_ptr); - else if (pbi->ec_active) - partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated partition size data"); - } - else - partition_size = (unsigned int)bytes_left; - - /* Validate the calculated partition length. If the buffer - * described by the partition can't be fully read, then restrict - * it to the portion that can be (for EC mode) or throw an error. - */ - if (!read_is_valid(fragment_start, partition_size, fragment_end)) - { - if (pbi->ec_active) - partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition " - "%d length", i + 1); - } - return partition_size; -} - - -static void setup_token_decoder(VP8D_COMP *pbi, - const unsigned char* token_part_sizes) -{ - vp8_reader *bool_decoder = &pbi->mbc[0]; - unsigned int partition_idx; - unsigned int fragment_idx; - unsigned int num_token_partitions; - const unsigned char *first_fragment_end = pbi->fragments.ptrs[0] + - pbi->fragments.sizes[0]; - - TOKEN_PARTITION multi_token_partition = - (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); - if (!vp8dx_bool_error(&pbi->mbc[8])) - pbi->common.multi_token_partition = multi_token_partition; - num_token_partitions = 1 << pbi->common.multi_token_partition; - - /* Check for partitions within the fragments and unpack the fragments - * so that each fragment pointer points to its corresponding partition. */ - for (fragment_idx = 0; fragment_idx < pbi->fragments.count; ++fragment_idx) - { - unsigned int fragment_size = pbi->fragments.sizes[fragment_idx]; - const unsigned char *fragment_end = pbi->fragments.ptrs[fragment_idx] + - fragment_size; - /* Special case for handling the first partition since we have already - * read its size. */ - if (fragment_idx == 0) - { - /* Size of first partition + token partition sizes element */ - ptrdiff_t ext_first_part_size = token_part_sizes - - pbi->fragments.ptrs[0] + 3 * (num_token_partitions - 1); - fragment_size -= (unsigned int)ext_first_part_size; - if (fragment_size > 0) - { - pbi->fragments.sizes[0] = (unsigned int)ext_first_part_size; - /* The fragment contains an additional partition. Move to - * next. */ - fragment_idx++; - pbi->fragments.ptrs[fragment_idx] = pbi->fragments.ptrs[0] + - pbi->fragments.sizes[0]; - } - } - /* Split the chunk into partitions read from the bitstream */ - while (fragment_size > 0) - { - ptrdiff_t partition_size = read_available_partition_size( - pbi, - token_part_sizes, - pbi->fragments.ptrs[fragment_idx], - first_fragment_end, - fragment_end, - fragment_idx - 1, - num_token_partitions); - pbi->fragments.sizes[fragment_idx] = (unsigned int)partition_size; - fragment_size -= (unsigned int)partition_size; - assert(fragment_idx <= num_token_partitions); - if (fragment_size > 0) - { - /* The fragment contains an additional partition. - * Move to next. */ - fragment_idx++; - pbi->fragments.ptrs[fragment_idx] = - pbi->fragments.ptrs[fragment_idx - 1] + partition_size; - } - } - } - - pbi->fragments.count = num_token_partitions + 1; - - for (partition_idx = 1; partition_idx < pbi->fragments.count; ++partition_idx) - { - if (vp8dx_start_decode(bool_decoder, - pbi->fragments.ptrs[partition_idx], - pbi->fragments.sizes[partition_idx], - pbi->decrypt_cb, pbi->decrypt_state)) - vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder %d", - partition_idx); - - bool_decoder++; - } - -#if CONFIG_MULTITHREAD - /* Clamp number of decoder threads */ - if (pbi->decoding_thread_count > num_token_partitions - 1) - pbi->decoding_thread_count = num_token_partitions - 1; -#endif -} - - -static void init_frame(VP8D_COMP *pbi) -{ - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - - if (pc->frame_type == KEY_FRAME) - { - /* Various keyframe initializations */ - memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); - - vp8_init_mbmode_probs(pc); - - vp8_default_coef_probs(pc); - - /* reset the segment feature data to 0 with delta coding (Default state). */ - memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - xd->mb_segement_abs_delta = SEGMENT_DELTADATA; - - /* reset the mode ref deltasa for loop filter */ - memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); - memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); - - /* All buffers are implicitly updated on key frames. */ - pc->refresh_golden_frame = 1; - pc->refresh_alt_ref_frame = 1; - pc->copy_buffer_to_gf = 0; - pc->copy_buffer_to_arf = 0; - - /* Note that Golden and Altref modes cannot be used on a key frame so - * ref_frame_sign_bias[] is undefined and meaningless - */ - pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0; - pc->ref_frame_sign_bias[ALTREF_FRAME] = 0; - } - else - { - /* To enable choice of different interploation filters */ - if (!pc->use_bilinear_mc_filter) - { - xd->subpixel_predict = vp8_sixtap_predict4x4; - xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; - xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; - xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; - } - else - { - xd->subpixel_predict = vp8_bilinear_predict4x4; - xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; - xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; - xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; - } - - if (pbi->decoded_key_frame && pbi->ec_enabled && !pbi->ec_active) - pbi->ec_active = 1; - } - - xd->left_context = &pc->left_context; - xd->mode_info_context = pc->mi; - xd->frame_type = pc->frame_type; - xd->mode_info_context->mbmi.mode = DC_PRED; - xd->mode_info_stride = pc->mode_info_stride; - xd->corrupted = 0; /* init without corruption */ - - xd->fullpixel_mask = 0xffffffff; - if(pc->full_pixel) - xd->fullpixel_mask = 0xfffffff8; - -} - -int vp8_decode_frame(VP8D_COMP *pbi) -{ - vp8_reader *const bc = &pbi->mbc[8]; - VP8_COMMON *const pc = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - const unsigned char *data = pbi->fragments.ptrs[0]; - const unsigned int data_sz = pbi->fragments.sizes[0]; - const unsigned char *data_end = data + data_sz; - ptrdiff_t first_partition_length_in_bytes; - - int i, j, k, l; - const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; - int corrupt_tokens = 0; - int prev_independent_partitions = pbi->independent_partitions; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - /* start with no corruption of current frame */ - xd->corrupted = 0; - yv12_fb_new->corrupted = 0; - - if (data_end - data < 3) - { - if (!pbi->ec_active) - { - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet"); - } - - /* Declare the missing frame as an inter frame since it will - be handled as an inter frame when we have estimated its - motion vectors. */ - pc->frame_type = INTER_FRAME; - pc->version = 0; - pc->show_frame = 1; - first_partition_length_in_bytes = 0; - } - else - { - unsigned char clear_buffer[10]; - const unsigned char *clear = data; - if (pbi->decrypt_cb) - { - int n = (int)VPXMIN(sizeof(clear_buffer), data_sz); - pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); - clear = clear_buffer; - } - - pc->frame_type = (FRAME_TYPE)(clear[0] & 1); - pc->version = (clear[0] >> 1) & 7; - pc->show_frame = (clear[0] >> 4) & 1; - first_partition_length_in_bytes = - (clear[0] | (clear[1] << 8) | (clear[2] << 16)) >> 5; - - if (!pbi->ec_active && - (data + first_partition_length_in_bytes > data_end - || data + first_partition_length_in_bytes < data)) - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition 0 length"); - - data += 3; - clear += 3; - - vp8_setup_version(pc); - - - if (pc->frame_type == KEY_FRAME) - { - /* vet via sync code */ - /* When error concealment is enabled we should only check the sync - * code if we have enough bits available - */ - if (!pbi->ec_active || data + 3 < data_end) - { - if (clear[0] != 0x9d || clear[1] != 0x01 || clear[2] != 0x2a) - vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - } - - /* If error concealment is enabled we should only parse the new size - * if we have enough data. Otherwise we will end up with the wrong - * size. - */ - if (!pbi->ec_active || data + 6 < data_end) - { - pc->Width = (clear[3] | (clear[4] << 8)) & 0x3fff; - pc->horiz_scale = clear[4] >> 6; - pc->Height = (clear[5] | (clear[6] << 8)) & 0x3fff; - pc->vert_scale = clear[6] >> 6; - } - data += 7; - } - else - { - memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - } - } - if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) - { - return -1; - } - - init_frame(pbi); - - if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data), - pbi->decrypt_cb, pbi->decrypt_state)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); - if (pc->frame_type == KEY_FRAME) { - (void)vp8_read_bit(bc); // colorspace - pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc); - } - - /* Is segmentation enabled */ - xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc); - - if (xd->segmentation_enabled) - { - /* Signal whether or not the segmentation map is being explicitly updated this frame. */ - xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc); - xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc); - - if (xd->update_mb_segmentation_data) - { - xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc); - - memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - - /* For each segmentation feature (Quant and loop filter level) */ - for (i = 0; i < MB_LVL_MAX; i++) - { - for (j = 0; j < MAX_MB_SEGMENTS; j++) - { - /* Frame level data */ - if (vp8_read_bit(bc)) - { - xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); - - if (vp8_read_bit(bc)) - xd->segment_feature_data[i][j] = -xd->segment_feature_data[i][j]; - } - else - xd->segment_feature_data[i][j] = 0; - } - } - } - - if (xd->update_mb_segmentation_map) - { - /* Which macro block level features are enabled */ - memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); - - /* Read the probs used to decode the segment id for each macro block. */ - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) - { - /* If not explicitly set value is defaulted to 255 by memset above */ - if (vp8_read_bit(bc)) - xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8); - } - } - } - else - { - /* No segmentation updates on this frame */ - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - } - - /* Read the loop filter level and type */ - pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc); - pc->filter_level = vp8_read_literal(bc, 6); - pc->sharpness_level = vp8_read_literal(bc, 3); - - /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */ - xd->mode_ref_lf_delta_update = 0; - xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc); - - if (xd->mode_ref_lf_delta_enabled) - { - /* Do the deltas need to be updated */ - xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc); - - if (xd->mode_ref_lf_delta_update) - { - /* Send update */ - for (i = 0; i < MAX_REF_LF_DELTAS; i++) - { - if (vp8_read_bit(bc)) - { - /*sign = vp8_read_bit( bc );*/ - xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - - if (vp8_read_bit(bc)) /* Apply sign */ - xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1; - } - } - - /* Send update */ - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) - { - if (vp8_read_bit(bc)) - { - /*sign = vp8_read_bit( bc );*/ - xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - - if (vp8_read_bit(bc)) /* Apply sign */ - xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1; - } - } - } - } - - setup_token_decoder(pbi, data + first_partition_length_in_bytes); - - xd->current_bc = &pbi->mbc[0]; - - /* Read the default quantizers. */ - { - int Q, q_update; - - Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */ - pc->base_qindex = Q; - q_update = 0; - pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update); - pc->y2dc_delta_q = get_delta_q(bc, pc->y2dc_delta_q, &q_update); - pc->y2ac_delta_q = get_delta_q(bc, pc->y2ac_delta_q, &q_update); - pc->uvdc_delta_q = get_delta_q(bc, pc->uvdc_delta_q, &q_update); - pc->uvac_delta_q = get_delta_q(bc, pc->uvac_delta_q, &q_update); - - if (q_update) - vp8cx_init_de_quantizer(pbi); - - /* MB level dequantizer setup */ - vp8_mb_init_dequantizer(pbi, &pbi->mb); - } - - /* Determine if the golden frame or ARF buffer should be updated and how. - * For all non key frames the GF and ARF refresh flags and sign bias - * flags must be set explicitly. - */ - if (pc->frame_type != KEY_FRAME) - { - /* Should the GF or ARF be updated from the current frame */ - pc->refresh_golden_frame = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh golden if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_golden_frame = 0; -#endif - - pc->refresh_alt_ref_frame = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh altref if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_alt_ref_frame = 0; -#endif - - /* Buffer to buffer copy flags. */ - pc->copy_buffer_to_gf = 0; - - if (!pc->refresh_golden_frame) - pc->copy_buffer_to_gf = vp8_read_literal(bc, 2); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't copy to the golden if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->copy_buffer_to_gf = 0; -#endif - - pc->copy_buffer_to_arf = 0; - - if (!pc->refresh_alt_ref_frame) - pc->copy_buffer_to_arf = vp8_read_literal(bc, 2); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't copy to the alt-ref if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->copy_buffer_to_arf = 0; -#endif - - - pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp8_read_bit(bc); - pc->ref_frame_sign_bias[ALTREF_FRAME] = vp8_read_bit(bc); - } - - pc->refresh_entropy_probs = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh the probabilities if the bit is - * missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_entropy_probs = 0; -#endif - if (pc->refresh_entropy_probs == 0) - { - memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); - } - - pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we should refresh the last frame if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_last_frame = 1; -#endif - - if (0) - { - FILE *z = fopen("decodestats.stt", "a"); - fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n", - pc->current_video_frame, - pc->frame_type, - pc->refresh_golden_frame, - pc->refresh_alt_ref_frame, - pc->refresh_last_frame, - pc->base_qindex); - fclose(z); - } - - { - pbi->independent_partitions = 1; - - /* read coef probability tree */ - for (i = 0; i < BLOCK_TYPES; i++) - for (j = 0; j < COEF_BANDS; j++) - for (k = 0; k < PREV_COEF_CONTEXTS; k++) - for (l = 0; l < ENTROPY_NODES; l++) - { - - vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l; - - if (vp8_read(bc, vp8_coef_update_probs [i][j][k][l])) - { - *p = (vp8_prob)vp8_read_literal(bc, 8); - - } - if (k > 0 && *p != pc->fc.coef_probs[i][j][k-1][l]) - pbi->independent_partitions = 0; - - } - } - - /* clear out the coeff buffer */ - memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - vp8_decode_mode_mvs(pbi); - -#if CONFIG_ERROR_CONCEALMENT - if (pbi->ec_active && - pbi->mvs_corrupt_from_mb < (unsigned int)pc->mb_cols * pc->mb_rows) - { - /* Motion vectors are missing in this frame. We will try to estimate - * them and then continue decoding the frame as usual */ - vp8_estimate_missing_mvs(pbi); - } -#endif - - memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); - pbi->frame_corrupt_residual = 0; - -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) - { - unsigned int thread; - vp8mt_decode_mb_rows(pbi, xd); - vp8_yv12_extend_frame_borders(yv12_fb_new); - for (thread = 0; thread < pbi->decoding_thread_count; ++thread) - corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted; - } - else -#endif - { - decode_mb_rows(pbi); - corrupt_tokens |= xd->corrupted; - } - - /* Collect information about decoder corruption. */ - /* 1. Check first boolean decoder for errors. */ - yv12_fb_new->corrupted = vp8dx_bool_error(bc); - /* 2. Check the macroblock information */ - yv12_fb_new->corrupted |= corrupt_tokens; - - if (!pbi->decoded_key_frame) - { - if (pc->frame_type == KEY_FRAME && - !yv12_fb_new->corrupted) - pbi->decoded_key_frame = 1; - else - vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, - "A stream must start with a complete key frame"); - } - - /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ - - if (pc->refresh_entropy_probs == 0) - { - memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); - pbi->independent_partitions = prev_independent_partitions; - } - -#ifdef PACKET_TESTING - { - FILE *f = fopen("decompressor.VP8", "ab"); - unsigned int size = pbi->bc2.pos + pbi->bc.pos + 8; - fwrite((void *) &size, 4, 1, f); - fwrite((void *) pbi->Source, size, 1, f); - fclose(f); - } -#endif - - return 0; -} diff --git a/thirdparty/libvpx/vp8/decoder/decodemv.c b/thirdparty/libvpx/vp8/decoder/decodemv.c deleted file mode 100644 index 1d155e7e16..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decodemv.c +++ /dev/null @@ -1,670 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "treereader.h" -#include "vp8/common/entropymv.h" -#include "vp8/common/entropymode.h" -#include "onyxd_int.h" -#include "vp8/common/findnearmv.h" - -#if CONFIG_DEBUG -#include -#endif -static B_PREDICTION_MODE read_bmode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_bmode_tree, p); - - return (B_PREDICTION_MODE)i; -} - -static MB_PREDICTION_MODE read_ymode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_ymode_tree, p); - - return (MB_PREDICTION_MODE)i; -} - -static MB_PREDICTION_MODE read_kf_ymode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p); - - return (MB_PREDICTION_MODE)i; -} - -static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p); - - return (MB_PREDICTION_MODE)i; -} - -static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) -{ - vp8_reader *const bc = & pbi->mbc[8]; - const int mis = pbi->common.mode_info_stride; - - mi->mbmi.ref_frame = INTRA_FRAME; - mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob); - - if (mi->mbmi.mode == B_PRED) - { - int i = 0; - mi->mbmi.is_4x4 = 1; - - do - { - const B_PREDICTION_MODE A = above_block_mode(mi, i, mis); - const B_PREDICTION_MODE L = left_block_mode(mi, i); - - mi->bmi[i].as_mode = - read_bmode(bc, vp8_kf_bmode_prob [A] [L]); - } - while (++i < 16); - } - - mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob); -} - -static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) -{ - const vp8_prob *const p = (const vp8_prob *) mvc; - int x = 0; - - if (vp8_read(r, p [mvpis_short])) /* Large */ - { - int i = 0; - - do - { - x += vp8_read(r, p [MVPbits + i]) << i; - } - while (++i < 3); - - i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ - - do - { - x += vp8_read(r, p [MVPbits + i]) << i; - } - while (--i > 3); - - if (!(x & 0xFFF0) || vp8_read(r, p [MVPbits + 3])) - x += 8; - } - else /* small */ - x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort); - - if (x && vp8_read(r, p [MVPsign])) - x = -x; - - return x; -} - -static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) -{ - mv->row = (short)(read_mvcomponent(r, mvc) * 2); - mv->col = (short)(read_mvcomponent(r, ++mvc) * 2); -} - - -static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) -{ - int i = 0; - - do - { - const vp8_prob *up = vp8_mv_update_probs[i].prob; - vp8_prob *p = (vp8_prob *)(mvc + i); - vp8_prob *const pstop = p + MVPcount; - - do - { - if (vp8_read(bc, *up++)) - { - const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7); - - *p = x ? x << 1 : 1; - } - } - while (++p < pstop); - } - while (++i < 2); -} - -static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1}; -static const unsigned char mbsplit_fill_offset[4][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15}, - { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} -}; - - -static void mb_mode_mv_init(VP8D_COMP *pbi) -{ - vp8_reader *const bc = & pbi->mbc[8]; - MV_CONTEXT *const mvc = pbi->common.fc.mvc; - -#if CONFIG_ERROR_CONCEALMENT - /* Default is that no macroblock is corrupt, therefore we initialize - * mvs_corrupt_from_mb to something very big, which we can be sure is - * outside the frame. */ - pbi->mvs_corrupt_from_mb = UINT_MAX; -#endif - /* Read the mb_no_coeff_skip flag */ - pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc); - - pbi->prob_skip_false = 0; - if (pbi->common.mb_no_coeff_skip) - pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); - - if(pbi->common.frame_type != KEY_FRAME) - { - pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8); - pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8); - pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8); - - if (vp8_read_bit(bc)) - { - int i = 0; - - do - { - pbi->common.fc.ymode_prob[i] = - (vp8_prob) vp8_read_literal(bc, 8); - } - while (++i < 4); - } - - if (vp8_read_bit(bc)) - { - int i = 0; - - do - { - pbi->common.fc.uv_mode_prob[i] = - (vp8_prob) vp8_read_literal(bc, 8); - } - while (++i < 3); - } - - read_mvcontexts(bc, mvc); - } -} - -const vp8_prob vp8_sub_mv_ref_prob3 [8][VP8_SUBMVREFS-1] = -{ - { 147, 136, 18 }, /* SUBMVREF_NORMAL */ - { 223, 1 , 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ - { 106, 145, 1 }, /* SUBMVREF_LEFT_ZED */ - { 208, 1 , 1 }, /* SUBMVREF_LEFT_ABOVE_ZED */ - { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ - { 223, 1 , 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ - { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ - { 208, 1 , 1 } /* SUBMVREF_LEFT_ABOVE_ZED */ -}; - -static -const vp8_prob * get_sub_mv_ref_prob(const int left, const int above) -{ - int lez = (left == 0); - int aez = (above == 0); - int lea = (left == above); - const vp8_prob * prob; - - prob = vp8_sub_mv_ref_prob3[(aez << 2) | - (lez << 1) | - (lea)]; - - return prob; -} - -static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, - const MODE_INFO *left_mb, const MODE_INFO *above_mb, - MB_MODE_INFO *mbmi, int_mv best_mv, - MV_CONTEXT *const mvc, int mb_to_left_edge, - int mb_to_right_edge, int mb_to_top_edge, - int mb_to_bottom_edge) -{ - int s; /* split configuration (16x8, 8x16, 8x8, 4x4) */ - int num_p; /* number of partitions in the split configuration - (see vp8_mbsplit_count) */ - int j = 0; - - s = 3; - num_p = 16; - if( vp8_read(bc, 110) ) - { - s = 2; - num_p = 4; - if( vp8_read(bc, 111) ) - { - s = vp8_read(bc, 150); - num_p = 2; - } - } - - do /* for each subset j */ - { - int_mv leftmv, abovemv; - int_mv blockmv; - int k; /* first block in subset j */ - - const vp8_prob *prob; - k = vp8_mbsplit_offset[s][j]; - - if (!(k & 3)) - { - /* On L edge, get from MB to left of us */ - if(left_mb->mbmi.mode != SPLITMV) - leftmv.as_int = left_mb->mbmi.mv.as_int; - else - leftmv.as_int = (left_mb->bmi + k + 4 - 1)->mv.as_int; - } - else - leftmv.as_int = (mi->bmi + k - 1)->mv.as_int; - - if (!(k >> 2)) - { - /* On top edge, get from MB above us */ - if(above_mb->mbmi.mode != SPLITMV) - abovemv.as_int = above_mb->mbmi.mv.as_int; - else - abovemv.as_int = (above_mb->bmi + k + 16 - 4)->mv.as_int; - } - else - abovemv.as_int = (mi->bmi + k - 4)->mv.as_int; - - prob = get_sub_mv_ref_prob(leftmv.as_int, abovemv.as_int); - - if( vp8_read(bc, prob[0]) ) - { - if( vp8_read(bc, prob[1]) ) - { - blockmv.as_int = 0; - if( vp8_read(bc, prob[2]) ) - { - blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2; - blockmv.as_mv.row += best_mv.as_mv.row; - blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2; - blockmv.as_mv.col += best_mv.as_mv.col; - } - } - else - { - blockmv.as_int = abovemv.as_int; - } - } - else - { - blockmv.as_int = leftmv.as_int; - } - - mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, - mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - - { - /* Fill (uniform) modes, mvs of jth subset. - Must do it here because ensuing subsets can - refer back to us via "left" or "above". */ - const unsigned char *fill_offset; - unsigned int fill_count = mbsplit_fill_count[s]; - - fill_offset = &mbsplit_fill_offset[s] - [(unsigned char)j * mbsplit_fill_count[s]]; - - do { - mi->bmi[ *fill_offset].mv.as_int = blockmv.as_int; - fill_offset++; - }while (--fill_count); - } - - } - while (++j < num_p); - - mbmi->partitioning = s; -} - -static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi) -{ - vp8_reader *const bc = & pbi->mbc[8]; - mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra); - if (mbmi->ref_frame) /* inter MB */ - { - enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV}; - int cnt[4]; - int *cntx = cnt; - int_mv near_mvs[4]; - int_mv *nmv = near_mvs; - const int mis = pbi->mb.mode_info_stride; - const MODE_INFO *above = mi - mis; - const MODE_INFO *left = mi - 1; - const MODE_INFO *aboveleft = above - 1; - int *ref_frame_sign_bias = pbi->common.ref_frame_sign_bias; - - mbmi->need_to_clamp_mvs = 0; - - if (vp8_read(bc, pbi->prob_last)) - { - mbmi->ref_frame = - (MV_REFERENCE_FRAME)((int)(2 + vp8_read(bc, pbi->prob_gf))); - } - - /* Zero accumulators */ - nmv[0].as_int = nmv[1].as_int = nmv[2].as_int = 0; - cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; - - /* Process above */ - if (above->mbmi.ref_frame != INTRA_FRAME) - { - if (above->mbmi.mv.as_int) - { - (++nmv)->as_int = above->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], - mbmi->ref_frame, nmv, ref_frame_sign_bias); - ++cntx; - } - - *cntx += 2; - } - - /* Process left */ - if (left->mbmi.ref_frame != INTRA_FRAME) - { - if (left->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = left->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], - mbmi->ref_frame, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != nmv->as_int) - { - (++nmv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 2; - } - else - cnt[CNT_INTRA] += 2; - } - - /* Process above left */ - if (aboveleft->mbmi.ref_frame != INTRA_FRAME) - { - if (aboveleft->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = aboveleft->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], - mbmi->ref_frame, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != nmv->as_int) - { - (++nmv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 1; - } - else - cnt[CNT_INTRA] += 1; - } - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_INTRA]] [0]) ) - { - - /* If we have three distinct MV's ... */ - /* See if above-left MV can be merged with NEAREST */ - cnt[CNT_NEAREST] += ( (cnt[CNT_SPLITMV] > 0) & - (nmv->as_int == near_mvs[CNT_NEAREST].as_int)); - - /* Swap near and nearest if necessary */ - if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) - { - int tmp; - tmp = cnt[CNT_NEAREST]; - cnt[CNT_NEAREST] = cnt[CNT_NEAR]; - cnt[CNT_NEAR] = tmp; - tmp = near_mvs[CNT_NEAREST].as_int; - near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; - near_mvs[CNT_NEAR].as_int = tmp; - } - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_NEAREST]] [1]) ) - { - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_NEAR]] [2]) ) - { - int mb_to_top_edge; - int mb_to_bottom_edge; - int mb_to_left_edge; - int mb_to_right_edge; - MV_CONTEXT *const mvc = pbi->common.fc.mvc; - int near_index; - - mb_to_top_edge = pbi->mb.mb_to_top_edge; - mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge; - mb_to_top_edge -= LEFT_TOP_MARGIN; - mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; - mb_to_right_edge = pbi->mb.mb_to_right_edge; - mb_to_right_edge += RIGHT_BOTTOM_MARGIN; - mb_to_left_edge = pbi->mb.mb_to_left_edge; - mb_to_left_edge -= LEFT_TOP_MARGIN; - - /* Use near_mvs[0] to store the "best" MV */ - near_index = CNT_INTRA + - (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]); - - vp8_clamp_mv2(&near_mvs[near_index], &pbi->mb); - - cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) - + (left->mbmi.mode == SPLITMV)) * 2 - + (aboveleft->mbmi.mode == SPLITMV); - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_SPLITMV]] [3]) ) - { - decode_split_mv(bc, mi, left, above, - mbmi, - near_mvs[near_index], - mvc, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - mbmi->mv.as_int = mi->bmi[15].mv.as_int; - mbmi->mode = SPLITMV; - mbmi->is_4x4 = 1; - } - else - { - int_mv *const mbmi_mv = & mbmi->mv; - read_mv(bc, &mbmi_mv->as_mv, (const MV_CONTEXT *) mvc); - mbmi_mv->as_mv.row += near_mvs[near_index].as_mv.row; - mbmi_mv->as_mv.col += near_mvs[near_index].as_mv.col; - - /* Don't need to check this on NEARMV and NEARESTMV - * modes since those modes clamp the MV. The NEWMV mode - * does not, so signal to the prediction stage whether - * special handling may be required. - */ - mbmi->need_to_clamp_mvs = - vp8_check_mv_bounds(mbmi_mv, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - mbmi->mode = NEWMV; - } - } - else - { - mbmi->mode = NEARMV; - mbmi->mv.as_int = near_mvs[CNT_NEAR].as_int; - vp8_clamp_mv2(&mbmi->mv, &pbi->mb); - } - } - else - { - mbmi->mode = NEARESTMV; - mbmi->mv.as_int = near_mvs[CNT_NEAREST].as_int; - vp8_clamp_mv2(&mbmi->mv, &pbi->mb); - } - } - else - { - mbmi->mode = ZEROMV; - mbmi->mv.as_int = 0; - } - -#if CONFIG_ERROR_CONCEALMENT - if(pbi->ec_enabled && (mbmi->mode != SPLITMV)) - { - mi->bmi[ 0].mv.as_int = - mi->bmi[ 1].mv.as_int = - mi->bmi[ 2].mv.as_int = - mi->bmi[ 3].mv.as_int = - mi->bmi[ 4].mv.as_int = - mi->bmi[ 5].mv.as_int = - mi->bmi[ 6].mv.as_int = - mi->bmi[ 7].mv.as_int = - mi->bmi[ 8].mv.as_int = - mi->bmi[ 9].mv.as_int = - mi->bmi[10].mv.as_int = - mi->bmi[11].mv.as_int = - mi->bmi[12].mv.as_int = - mi->bmi[13].mv.as_int = - mi->bmi[14].mv.as_int = - mi->bmi[15].mv.as_int = mbmi->mv.as_int; - } -#endif - } - else - { - /* required for left and above block mv */ - mbmi->mv.as_int = 0; - - /* MB is intra coded */ - if ((mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED) - { - int j = 0; - mbmi->is_4x4 = 1; - do - { - mi->bmi[j].as_mode = read_bmode(bc, pbi->common.fc.bmode_prob); - } - while (++j < 16); - } - - mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob); - } - -} - -static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x) -{ - /* Is segmentation enabled */ - if (x->segmentation_enabled && x->update_mb_segmentation_map) - { - /* If so then read the segment id. */ - if (vp8_read(r, x->mb_segment_tree_probs[0])) - mi->segment_id = - (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2])); - else - mi->segment_id = - (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1])); - } -} - -static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi, - MB_MODE_INFO *mbmi) -{ - (void)mbmi; - - /* Read the Macroblock segmentation map if it is being updated explicitly - * this frame (reset to 0 above by default) - * By default on a key frame reset all MBs to segment 0 - */ - if (pbi->mb.update_mb_segmentation_map) - read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb); - else if(pbi->common.frame_type == KEY_FRAME) - mi->mbmi.segment_id = 0; - - /* Read the macroblock coeff skip flag if this feature is in use, - * else default to 0 */ - if (pbi->common.mb_no_coeff_skip) - mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false); - else - mi->mbmi.mb_skip_coeff = 0; - - mi->mbmi.is_4x4 = 0; - if(pbi->common.frame_type == KEY_FRAME) - read_kf_modes(pbi, mi); - else - read_mb_modes_mv(pbi, mi, &mi->mbmi); - -} - -void vp8_decode_mode_mvs(VP8D_COMP *pbi) -{ - MODE_INFO *mi = pbi->common.mi; - int mb_row = -1; - int mb_to_right_edge_start; - - mb_mode_mv_init(pbi); - - pbi->mb.mb_to_top_edge = 0; - pbi->mb.mb_to_bottom_edge = ((pbi->common.mb_rows - 1) * 16) << 3; - mb_to_right_edge_start = ((pbi->common.mb_cols - 1) * 16) << 3; - - while (++mb_row < pbi->common.mb_rows) - { - int mb_col = -1; - - pbi->mb.mb_to_left_edge = 0; - pbi->mb.mb_to_right_edge = mb_to_right_edge_start; - - while (++mb_col < pbi->common.mb_cols) - { -#if CONFIG_ERROR_CONCEALMENT - int mb_num = mb_row * pbi->common.mb_cols + mb_col; -#endif - - decode_mb_mode_mvs(pbi, mi, &mi->mbmi); - -#if CONFIG_ERROR_CONCEALMENT - /* look for corruption. set mvs_corrupt_from_mb to the current - * mb_num if the frame is corrupt from this macroblock. */ - if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num < - (int)pbi->mvs_corrupt_from_mb) - { - pbi->mvs_corrupt_from_mb = mb_num; - /* no need to continue since the partition is corrupt from - * here on. - */ - return; - } -#endif - - pbi->mb.mb_to_left_edge -= (16 << 3); - pbi->mb.mb_to_right_edge -= (16 << 3); - mi++; /* next macroblock */ - } - pbi->mb.mb_to_top_edge -= (16 << 3); - pbi->mb.mb_to_bottom_edge -= (16 << 3); - - mi++; /* skip left predictor each row */ - } -} diff --git a/thirdparty/libvpx/vp8/decoder/decodemv.h b/thirdparty/libvpx/vp8/decoder/decodemv.h deleted file mode 100644 index f33b07351d..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decodemv.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_DECODER_DECODEMV_H_ -#define VP8_DECODER_DECODEMV_H_ - -#include "onyxd_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_decode_mode_mvs(VP8D_COMP *); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DECODEMV_H_ diff --git a/thirdparty/libvpx/vp8/decoder/decoderthreading.h b/thirdparty/libvpx/vp8/decoder/decoderthreading.h deleted file mode 100644 index c563cf6e93..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decoderthreading.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_DECODER_DECODERTHREADING_H_ -#define VP8_DECODER_DECODERTHREADING_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#if CONFIG_MULTITHREAD -void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); -void vp8_decoder_remove_threads(VP8D_COMP *pbi); -void vp8_decoder_create_threads(VP8D_COMP *pbi); -void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); -void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DECODERTHREADING_H_ diff --git a/thirdparty/libvpx/vp8/decoder/detokenize.c b/thirdparty/libvpx/vp8/decoder/detokenize.c deleted file mode 100644 index fcc7533c50..0000000000 --- a/thirdparty/libvpx/vp8/decoder/detokenize.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/blockd.h" -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "detokenize.h" - -void vp8_reset_mb_tokens_context(MACROBLOCKD *x) -{ - ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); - ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); - - memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); - memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); - - /* Clear entropy contexts for Y2 blocks */ - if (!x->mode_info_context->mbmi.is_4x4) - { - a_ctx[8] = l_ctx[8] = 0; - } -} - -/* - ------------------------------------------------------------------------------ - Residual decoding (Paragraph 13.2 / 13.3) -*/ -static const uint8_t kBands[16 + 1] = { - 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, - 0 /* extra entry as sentinel */ -}; - -static const uint8_t kCat3[] = { 173, 148, 140, 0 }; -static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; -static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; -static const uint8_t kCat6[] = - { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; -static const uint8_t kZigzag[16] = { - 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 -}; - -#define VP8GetBit vp8dx_decode_bool -#define NUM_PROBAS 11 -#define NUM_CTX 3 - -/* for const-casting */ -typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; - -static int GetSigned(BOOL_DECODER *br, int value_to_sign) -{ - int split = (br->range + 1) >> 1; - VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); - int v; - - if(br->count < 0) - vp8dx_bool_decoder_fill(br); - - if ( br->value < bigsplit ) - { - br->range = split; - v= value_to_sign; - } - else - { - br->range = br->range-split; - br->value = br->value-bigsplit; - v = -value_to_sign; - } - br->range +=br->range; - br->value +=br->value; - br->count--; - - return v; -} -/* - Returns the position of the last non-zero coeff plus one - (and 0 if there's no coeff at all) -*/ -static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob, - int ctx, int n, int16_t* out) -{ - const uint8_t* p = prob[n][ctx]; - if (!VP8GetBit(br, p[0])) - { /* first EOB is more a 'CBP' bit. */ - return 0; - } - while (1) - { - ++n; - if (!VP8GetBit(br, p[1])) - { - p = prob[kBands[n]][0]; - } - else - { /* non zero coeff */ - int v, j; - if (!VP8GetBit(br, p[2])) - { - p = prob[kBands[n]][1]; - v = 1; - } - else - { - if (!VP8GetBit(br, p[3])) - { - if (!VP8GetBit(br, p[4])) - { - v = 2; - } - else - { - v = 3 + VP8GetBit(br, p[5]); - } - } - else - { - if (!VP8GetBit(br, p[6])) - { - if (!VP8GetBit(br, p[7])) - { - v = 5 + VP8GetBit(br, 159); - } else - { - v = 7 + 2 * VP8GetBit(br, 165); - v += VP8GetBit(br, 145); - } - } - else - { - const uint8_t* tab; - const int bit1 = VP8GetBit(br, p[8]); - const int bit0 = VP8GetBit(br, p[9 + bit1]); - const int cat = 2 * bit1 + bit0; - v = 0; - for (tab = kCat3456[cat]; *tab; ++tab) - { - v += v + VP8GetBit(br, *tab); - } - v += 3 + (8 << cat); - } - } - p = prob[kBands[n]][2]; - } - j = kZigzag[n - 1]; - - out[j] = GetSigned(br, v); - - if (n == 16 || !VP8GetBit(br, p[0])) - { /* EOB */ - return n; - } - } - if (n == 16) - { - return 16; - } - } -} - -int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) -{ - BOOL_DECODER *bc = x->current_bc; - const FRAME_CONTEXT * const fc = &dx->common.fc; - char *eobs = x->eobs; - - int i; - int nonzeros; - int eobtotal = 0; - - short *qcoeff_ptr; - ProbaArray coef_probs; - ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); - ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; - int skip_dc = 0; - - qcoeff_ptr = &x->qcoeff[0]; - - if (!x->mode_info_context->mbmi.is_4x4) - { - a = a_ctx + 8; - l = l_ctx + 8; - - coef_probs = fc->coef_probs [1]; - - nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16); - *a = *l = (nonzeros > 0); - - eobs[24] = nonzeros; - eobtotal += nonzeros - 16; - - coef_probs = fc->coef_probs [0]; - skip_dc = 1; - } - else - { - coef_probs = fc->coef_probs [3]; - skip_dc = 0; - } - - for (i = 0; i < 16; ++i) - { - a = a_ctx + (i&3); - l = l_ctx + ((i&0xc)>>2); - - nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr); - *a = *l = (nonzeros > 0); - - nonzeros += skip_dc; - eobs[i] = nonzeros; - eobtotal += nonzeros; - qcoeff_ptr += 16; - } - - coef_probs = fc->coef_probs [2]; - - a_ctx += 4; - l_ctx += 4; - for (i = 16; i < 24; ++i) - { - a = a_ctx + ((i > 19)<<1) + (i&1); - l = l_ctx + ((i > 19)<<1) + ((i&3)>1); - - nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr); - *a = *l = (nonzeros > 0); - - eobs[i] = nonzeros; - eobtotal += nonzeros; - qcoeff_ptr += 16; - } - - return eobtotal; -} - diff --git a/thirdparty/libvpx/vp8/decoder/detokenize.h b/thirdparty/libvpx/vp8/decoder/detokenize.h deleted file mode 100644 index f0b125444f..0000000000 --- a/thirdparty/libvpx/vp8/decoder/detokenize.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_DECODER_DETOKENIZE_H_ -#define VP8_DECODER_DETOKENIZE_H_ - -#include "onyxd_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_reset_mb_tokens_context(MACROBLOCKD *x); -int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DETOKENIZE_H_ diff --git a/thirdparty/libvpx/vp8/decoder/onyxd_if.c b/thirdparty/libvpx/vp8/decoder/onyxd_if.c deleted file mode 100644 index 3468268a2a..0000000000 --- a/thirdparty/libvpx/vp8/decoder/onyxd_if.c +++ /dev/null @@ -1,521 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/onyxc_int.h" -#if CONFIG_POSTPROC -#include "vp8/common/postproc.h" -#endif -#include "vp8/common/onyxd.h" -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/alloccommon.h" -#include "vp8/common/loopfilter.h" -#include "vp8/common/swapyv12buffer.h" -#include "vp8/common/threading.h" -#include "decoderthreading.h" -#include -#include - -#include "vp8/common/quant_common.h" -#include "vp8/common/reconintra.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "vpx_scale/vpx_scale.h" -#include "vp8/common/systemdependent.h" -#include "vpx_ports/vpx_once.h" -#include "vpx_ports/vpx_timer.h" -#include "detokenize.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif -#if ARCH_ARM -#include "vpx_ports/arm.h" -#endif - -extern void vp8_init_loop_filter(VP8_COMMON *cm); -extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); -static int get_free_fb (VP8_COMMON *cm); -static void ref_cnt_fb (int *buf, int *idx, int new_idx); - -static void initialize_dec(void) { - static volatile int init_done = 0; - - if (!init_done) - { - vpx_dsp_rtcd(); - vp8_init_intra_predictors(); - init_done = 1; - } -} - -static void remove_decompressor(VP8D_COMP *pbi) -{ -#if CONFIG_ERROR_CONCEALMENT - vp8_de_alloc_overlap_lists(pbi); -#endif - vp8_remove_common(&pbi->common); - vpx_free(pbi); -} - -static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) -{ - VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); - - if (!pbi) - return NULL; - - memset(pbi, 0, sizeof(VP8D_COMP)); - - if (setjmp(pbi->common.error.jmp)) - { - pbi->common.error.setjmp = 0; - remove_decompressor(pbi); - return 0; - } - - pbi->common.error.setjmp = 1; - - vp8_create_common(&pbi->common); - - pbi->common.current_video_frame = 0; - pbi->ready_for_new_data = 1; - - /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid - * unnecessary calling of vp8cx_init_de_quantizer() for every frame. - */ - vp8cx_init_de_quantizer(pbi); - - vp8_loop_filter_init(&pbi->common); - - pbi->common.error.setjmp = 0; - -#if CONFIG_ERROR_CONCEALMENT - pbi->ec_enabled = oxcf->error_concealment; - pbi->overlaps = NULL; -#else - (void)oxcf; - pbi->ec_enabled = 0; -#endif - /* Error concealment is activated after a key frame has been - * decoded without errors when error concealment is enabled. - */ - pbi->ec_active = 0; - - pbi->decoded_key_frame = 0; - - /* Independent partitions is activated when a frame updates the - * token probability table to have equal probabilities over the - * PREV_COEF context. - */ - pbi->independent_partitions = 0; - - vp8_setup_block_dptrs(&pbi->mb); - - once(initialize_dec); - - return pbi; -} - -vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) -{ - VP8_COMMON *cm = &pbi->common; - int ref_fb_idx; - - if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_idx = cm->alt_fb_idx; - else{ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Invalid reference frame"); - return pbi->common.error.error_code; - } - - if(cm->yv12_fb[ref_fb_idx].y_height != sd->y_height || - cm->yv12_fb[ref_fb_idx].y_width != sd->y_width || - cm->yv12_fb[ref_fb_idx].uv_height != sd->uv_height || - cm->yv12_fb[ref_fb_idx].uv_width != sd->uv_width){ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } - else - vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd); - - return pbi->common.error.error_code; -} - - -vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) -{ - VP8_COMMON *cm = &pbi->common; - int *ref_fb_ptr = NULL; - int free_fb; - - if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_ptr = &cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_ptr = &cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_ptr = &cm->alt_fb_idx; - else{ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Invalid reference frame"); - return pbi->common.error.error_code; - } - - if(cm->yv12_fb[*ref_fb_ptr].y_height != sd->y_height || - cm->yv12_fb[*ref_fb_ptr].y_width != sd->y_width || - cm->yv12_fb[*ref_fb_ptr].uv_height != sd->uv_height || - cm->yv12_fb[*ref_fb_ptr].uv_width != sd->uv_width){ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } - else{ - /* Find an empty frame buffer. */ - free_fb = get_free_fb(cm); - /* Decrease fb_idx_ref_cnt since it will be increased again in - * ref_cnt_fb() below. */ - cm->fb_idx_ref_cnt[free_fb]--; - - /* Manage the reference counters and copy image. */ - ref_cnt_fb (cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb); - vp8_yv12_copy_frame(sd, &cm->yv12_fb[*ref_fb_ptr]); - } - - return pbi->common.error.error_code; -} - -static int get_free_fb (VP8_COMMON *cm) -{ - int i; - for (i = 0; i < NUM_YV12_BUFFERS; i++) - if (cm->fb_idx_ref_cnt[i] == 0) - break; - - assert(i < NUM_YV12_BUFFERS); - cm->fb_idx_ref_cnt[i] = 1; - return i; -} - -static void ref_cnt_fb (int *buf, int *idx, int new_idx) -{ - if (buf[*idx] > 0) - buf[*idx]--; - - *idx = new_idx; - - buf[new_idx]++; -} - -/* If any buffer copy / swapping is signalled it should be done here. */ -static int swap_frame_buffers (VP8_COMMON *cm) -{ - int err = 0; - - /* The alternate reference frame or golden frame can be updated - * using the new, last, or golden/alt ref frame. If it - * is updated using the newly decoded frame it is a refresh. - * An update using the last or golden/alt ref frame is a copy. - */ - if (cm->copy_buffer_to_arf) - { - int new_fb = 0; - - if (cm->copy_buffer_to_arf == 1) - new_fb = cm->lst_fb_idx; - else if (cm->copy_buffer_to_arf == 2) - new_fb = cm->gld_fb_idx; - else - err = -1; - - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb); - } - - if (cm->copy_buffer_to_gf) - { - int new_fb = 0; - - if (cm->copy_buffer_to_gf == 1) - new_fb = cm->lst_fb_idx; - else if (cm->copy_buffer_to_gf == 2) - new_fb = cm->alt_fb_idx; - else - err = -1; - - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb); - } - - if (cm->refresh_golden_frame) - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx); - - if (cm->refresh_alt_ref_frame) - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx); - - if (cm->refresh_last_frame) - { - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx); - - cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx]; - } - else - cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; - - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - - return err; -} - -static int check_fragments_for_errors(VP8D_COMP *pbi) -{ - if (!pbi->ec_active && - pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0) - { - VP8_COMMON *cm = &pbi->common; - - /* If error concealment is disabled we won't signal missing frames - * to the decoder. - */ - if (cm->fb_idx_ref_cnt[cm->lst_fb_idx] > 1) - { - /* The last reference shares buffer with another reference - * buffer. Move it to its own buffer before setting it as - * corrupt, otherwise we will make multiple buffers corrupt. - */ - const int prev_idx = cm->lst_fb_idx; - cm->fb_idx_ref_cnt[prev_idx]--; - cm->lst_fb_idx = get_free_fb(cm); - vp8_yv12_copy_frame(&cm->yv12_fb[prev_idx], - &cm->yv12_fb[cm->lst_fb_idx]); - } - /* This is used to signal that we are missing frames. - * We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; - - /* Signal that we have no frame to show. */ - cm->show_frame = 0; - - /* Nothing more to do. */ - return 0; - } - - return 1; -} - -int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, - const uint8_t *source, - int64_t time_stamp) -{ - VP8_COMMON *cm = &pbi->common; - int retcode = -1; - (void)size; - (void)source; - - pbi->common.error.error_code = VPX_CODEC_OK; - - retcode = check_fragments_for_errors(pbi); - if(retcode <= 0) - return retcode; - - cm->new_fb_idx = get_free_fb (cm); - - /* setup reference frames for vp8_decode_frame */ - pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx]; - pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx]; - pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; - pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; - - if (setjmp(pbi->common.error.jmp)) - { - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; - - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - - goto decode_exit; - } - - pbi->common.error.setjmp = 1; - - retcode = vp8_decode_frame(pbi); - - if (retcode < 0) - { - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - - pbi->common.error.error_code = VPX_CODEC_ERROR; - goto decode_exit; - } - - if (swap_frame_buffers (cm)) - { - pbi->common.error.error_code = VPX_CODEC_ERROR; - goto decode_exit; - } - - vp8_clear_system_state(); - - if (cm->show_frame) - { - cm->current_video_frame++; - cm->show_frame_mi = cm->mi; - } - - #if CONFIG_ERROR_CONCEALMENT - /* swap the mode infos to storage for future error concealment */ - if (pbi->ec_enabled && pbi->common.prev_mi) - { - MODE_INFO* tmp = pbi->common.prev_mi; - int row, col; - pbi->common.prev_mi = pbi->common.mi; - pbi->common.mi = tmp; - - /* Propagate the segment_ids to the next frame */ - for (row = 0; row < pbi->common.mb_rows; ++row) - { - for (col = 0; col < pbi->common.mb_cols; ++col) - { - const int i = row*pbi->common.mode_info_stride + col; - pbi->common.mi[i].mbmi.segment_id = - pbi->common.prev_mi[i].mbmi.segment_id; - } - } - } -#endif - - pbi->ready_for_new_data = 0; - pbi->last_time_stamp = time_stamp; - -decode_exit: - pbi->common.error.setjmp = 0; - vp8_clear_system_state(); - return retcode; -} -int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) -{ - int ret = -1; - - if (pbi->ready_for_new_data == 1) - return ret; - - /* ie no raw frame to show!!! */ - if (pbi->common.show_frame == 0) - return ret; - - pbi->ready_for_new_data = 1; - *time_stamp = pbi->last_time_stamp; - *time_end_stamp = 0; - -#if CONFIG_POSTPROC - ret = vp8_post_proc_frame(&pbi->common, sd, flags); -#else - (void)flags; - - if (pbi->common.frame_to_show) - { - *sd = *pbi->common.frame_to_show; - sd->y_width = pbi->common.Width; - sd->y_height = pbi->common.Height; - sd->uv_height = pbi->common.Height / 2; - ret = 0; - } - else - { - ret = -1; - } - -#endif /*!CONFIG_POSTPROC*/ - vp8_clear_system_state(); - return ret; -} - - -/* This function as written isn't decoder specific, but the encoder has - * much faster ways of computing this, so it's ok for it to live in a - * decode specific file. - */ -int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ) -{ - const MODE_INFO *mi = oci->mi; - int mb_row, mb_col; - - for (mb_row = 0; mb_row < oci->mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < oci->mb_cols; mb_col++,mi++) - { - if( mi->mbmi.ref_frame == ref_frame) - return 1; - } - mi++; - } - return 0; - -} - -int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf) -{ - if(!fb->use_frame_threads) - { - /* decoder instance for single thread mode */ - fb->pbi[0] = create_decompressor(oxcf); - if(!fb->pbi[0]) - return VPX_CODEC_ERROR; - -#if CONFIG_MULTITHREAD - /* enable row-based threading only when use_frame_threads - * is disabled */ - fb->pbi[0]->max_threads = oxcf->max_threads; - vp8_decoder_create_threads(fb->pbi[0]); -#endif - } - else - { - /* TODO : create frame threads and decoder instances for each - * thread here */ - } - - return VPX_CODEC_OK; -} - -int vp8_remove_decoder_instances(struct frame_buffers *fb) -{ - if(!fb->use_frame_threads) - { - VP8D_COMP *pbi = fb->pbi[0]; - - if (!pbi) - return VPX_CODEC_ERROR; -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); - vp8_decoder_remove_threads(pbi); -#endif - - /* decoder instance for single thread mode */ - remove_decompressor(pbi); - } - else - { - /* TODO : remove frame threads and decoder instances for each - * thread here */ - } - - return VPX_CODEC_OK; -} diff --git a/thirdparty/libvpx/vp8/decoder/onyxd_int.h b/thirdparty/libvpx/vp8/decoder/onyxd_int.h deleted file mode 100644 index 313fe01c07..0000000000 --- a/thirdparty/libvpx/vp8/decoder/onyxd_int.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_DECODER_ONYXD_INT_H_ -#define VP8_DECODER_ONYXD_INT_H_ - -#include "vpx_config.h" -#include "vp8/common/onyxd.h" -#include "treereader.h" -#include "vp8/common/onyxc_int.h" -#include "vp8/common/threading.h" - -#if CONFIG_ERROR_CONCEALMENT -#include "ec_types.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ - int ithread; - void *ptr1; - void *ptr2; -} DECODETHREAD_DATA; - -typedef struct -{ - MACROBLOCKD mbd; -} MB_ROW_DEC; - - -typedef struct -{ - int enabled; - unsigned int count; - const unsigned char *ptrs[MAX_PARTITIONS]; - unsigned int sizes[MAX_PARTITIONS]; -} FRAGMENT_DATA; - -#define MAX_FB_MT_DEC 32 - -struct frame_buffers -{ - /* - * this struct will be populated with frame buffer management - * info in future commits. */ - - /* enable/disable frame-based threading */ - int use_frame_threads; - - /* decoder instances */ - struct VP8D_COMP *pbi[MAX_FB_MT_DEC]; - -}; - -typedef struct VP8D_COMP -{ - DECLARE_ALIGNED(16, MACROBLOCKD, mb); - - YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS]; - - DECLARE_ALIGNED(16, VP8_COMMON, common); - - /* the last partition will be used for the modes/mvs */ - vp8_reader mbc[MAX_PARTITIONS]; - - VP8D_CONFIG oxcf; - - FRAGMENT_DATA fragments; - -#if CONFIG_MULTITHREAD - /* variable for threading */ - - int b_multithreaded_rd; - int max_threads; - int current_mb_col_main; - unsigned int decoding_thread_count; - int allocated_decoding_thread_count; - - int mt_baseline_filter_level[MAX_MB_SEGMENTS]; - int sync_range; - int *mt_current_mb_col; /* Each row remembers its already decoded column. */ - pthread_mutex_t *pmutex; - pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */ - - unsigned char **mt_yabove_row; /* mb_rows x width */ - unsigned char **mt_uabove_row; - unsigned char **mt_vabove_row; - unsigned char **mt_yleft_col; /* mb_rows x 16 */ - unsigned char **mt_uleft_col; /* mb_rows x 8 */ - unsigned char **mt_vleft_col; /* mb_rows x 8 */ - - MB_ROW_DEC *mb_row_di; - DECODETHREAD_DATA *de_thread_data; - - pthread_t *h_decoding_thread; - sem_t *h_event_start_decoding; - sem_t h_event_end_decoding; - /* end of threading data */ -#endif - - int64_t last_time_stamp; - int ready_for_new_data; - - vp8_prob prob_intra; - vp8_prob prob_last; - vp8_prob prob_gf; - vp8_prob prob_skip_false; - -#if CONFIG_ERROR_CONCEALMENT - MB_OVERLAP *overlaps; - /* the mb num from which modes and mvs (first partition) are corrupt */ - unsigned int mvs_corrupt_from_mb; -#endif - int ec_enabled; - int ec_active; - int decoded_key_frame; - int independent_partitions; - int frame_corrupt_residual; - - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; -} VP8D_COMP; - -int vp8_decode_frame(VP8D_COMP *cpi); - -int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf); -int vp8_remove_decoder_instances(struct frame_buffers *fb); - -#if CONFIG_DEBUG -#define CHECK_MEM_ERROR(lval,expr) do {\ - lval = (expr); \ - if(!lval) \ - vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\ - "Failed to allocate "#lval" at %s:%d", \ - __FILE__,__LINE__);\ - } while(0) -#else -#define CHECK_MEM_ERROR(lval,expr) do {\ - lval = (expr); \ - if(!lval) \ - vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\ - "Failed to allocate "#lval);\ - } while(0) -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_ONYXD_INT_H_ diff --git a/thirdparty/libvpx/vp8/decoder/threading.c b/thirdparty/libvpx/vp8/decoder/threading.c deleted file mode 100644 index 3c1b8387ec..0000000000 --- a/thirdparty/libvpx/vp8/decoder/threading.c +++ /dev/null @@ -1,928 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1 -# include -#endif -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/threading.h" - -#include "vp8/common/loopfilter.h" -#include "vp8/common/extend.h" -#include "vpx_ports/vpx_timer.h" -#include "detokenize.h" -#include "vp8/common/reconintra4x4.h" -#include "vp8/common/reconinter.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/setupintrarecon.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif - -#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) -#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ - CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ - memset((p), 0, (n) * sizeof(*(p))); \ -} while (0) - - -void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); - -static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) -{ - VP8_COMMON *const pc = & pbi->common; - int i; - - for (i = 0; i < count; i++) - { - MACROBLOCKD *mbd = &mbrd[i].mbd; - mbd->subpixel_predict = xd->subpixel_predict; - mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; - mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; - mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; - - mbd->frame_type = pc->frame_type; - mbd->pre = xd->pre; - mbd->dst = xd->dst; - - mbd->segmentation_enabled = xd->segmentation_enabled; - mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); - - /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ - memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); - /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ - memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); - /*unsigned char mode_ref_lf_delta_enabled; - unsigned char mode_ref_lf_delta_update;*/ - mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; - mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; - - mbd->current_bc = &pbi->mbc[0]; - - memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); - memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); - memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); - memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); - - mbd->fullpixel_mask = 0xffffffff; - - if (pc->full_pixel) - mbd->fullpixel_mask = 0xfffffff8; - - } - - for (i = 0; i < pc->mb_rows; i++) - pbi->mt_current_mb_col[i] = -1; -} - -static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - unsigned int mb_idx) -{ - MB_PREDICTION_MODE mode; - int i; -#if CONFIG_ERROR_CONCEALMENT - int corruption_detected = 0; -#else - (void)mb_idx; -#endif - - if (xd->mode_info_context->mbmi.mb_skip_coeff) - { - vp8_reset_mb_tokens_context(xd); - } - else if (!vp8dx_bool_error(xd->current_bc)) - { - int eobtotal; - eobtotal = vp8_decode_mb_tokens(pbi, xd); - - /* Special case: Force the loopfilter to skip when eobtotal is zero */ - xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); - } - - mode = xd->mode_info_context->mbmi.mode; - - if (xd->segmentation_enabled) - vp8_mb_init_dequantizer(pbi, xd); - - -#if CONFIG_ERROR_CONCEALMENT - - if(pbi->ec_active) - { - int throw_residual; - /* When we have independent partitions we can apply residual even - * though other partitions within the frame are corrupt. - */ - throw_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual); - throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); - - if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) - { - /* MB with corrupt residuals or corrupt mode/motion vectors. - * Better to use the predictor as reconstruction. - */ - pbi->frame_corrupt_residual = 1; - memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - corruption_detected = 1; - - /* force idct to be skipped for B_PRED and use the - * prediction only for reconstruction - * */ - memset(xd->eobs, 0, 25); - } - } -#endif - - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { - vp8_build_intra_predictors_mbuv_s(xd, - xd->recon_above[1], - xd->recon_above[2], - xd->recon_left[1], - xd->recon_left[2], - xd->recon_left_stride[1], - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride); - - if (mode != B_PRED) - { - vp8_build_intra_predictors_mby_s(xd, - xd->recon_above[0], - xd->recon_left[0], - xd->recon_left_stride[0], - xd->dst.y_buffer, - xd->dst.y_stride); - } - else - { - short *DQC = xd->dequant_y1; - int dst_stride = xd->dst.y_stride; - - /* clear out residual eob info */ - if(xd->mode_info_context->mbmi.mb_skip_coeff) - memset(xd->eobs, 0, 25); - - intra_prediction_down_copy(xd, xd->recon_above[0] + 16); - - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; - unsigned char *dst = xd->dst.y_buffer + b->offset; - B_PREDICTION_MODE b_mode = - xd->mode_info_context->bmi[i].as_mode; - unsigned char *Above; - unsigned char *yleft; - int left_stride; - unsigned char top_left; - - /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ - if (i < 4 && pbi->common.filter_level) - Above = xd->recon_above[0] + b->offset; - else - Above = dst - dst_stride; - - if (i%4==0 && pbi->common.filter_level) - { - yleft = xd->recon_left[0] + i; - left_stride = 1; - } - else - { - yleft = dst - 1; - left_stride = dst_stride; - } - - if ((i==4 || i==8 || i==12) && pbi->common.filter_level) - top_left = *(xd->recon_left[0] + i - 1); - else - top_left = Above[-1]; - - vp8_intra4x4_predict(Above, yleft, left_stride, - b_mode, dst, dst_stride, top_left); - - if (xd->eobs[i] ) - { - if (xd->eobs[i] > 1) - { - vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); - } - else - { - vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], - dst, dst_stride, dst, dst_stride); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - } - } - } - } - else - { - vp8_build_inter_predictors_mb(xd); - } - - -#if CONFIG_ERROR_CONCEALMENT - if (corruption_detected) - { - return; - } -#endif - - if(!xd->mode_info_context->mbmi.mb_skip_coeff) - { - /* dequantization and idct */ - if (mode != B_PRED) - { - short *DQC = xd->dequant_y1; - - if (mode != SPLITMV) - { - BLOCKD *b = &xd->block[24]; - - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - vp8_dequantize_b(b, xd->dequant_y2); - - vp8_short_inv_walsh4x4(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); - } - else - { - b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; - vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - - /* override the dc dequant constant in order to preserve the - * dc components - */ - DQC = xd->dequant_y1_dc; - } - - vp8_dequant_idct_add_y_block - (xd->qcoeff, DQC, - xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); - } - - vp8_dequant_idct_add_uv_block - (xd->qcoeff+16*16, xd->dequant_uv, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs+16); - } -} - -static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) -{ - const int *last_row_current_mb_col; - int *current_mb_col; - int mb_row; - VP8_COMMON *pc = &pbi->common; - const int nsync = pbi->sync_range; - const int first_row_no_sync_above = pc->mb_cols + nsync; - int num_part = 1 << pbi->common.multi_token_partition; - int last_mb_row = start_mb_row; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; - - int recon_y_stride = yv12_fb_new->y_stride; - int recon_uv_stride = yv12_fb_new->uv_stride; - - unsigned char *ref_buffer[MAX_REF_FRAMES][3]; - unsigned char *dst_buffer[3]; - int i; - int ref_fb_corrupted[MAX_REF_FRAMES]; - - ref_fb_corrupted[INTRA_FRAME] = 0; - - for(i = 1; i < MAX_REF_FRAMES; i++) - { - YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; - - ref_buffer[i][0] = this_fb->y_buffer; - ref_buffer[i][1] = this_fb->u_buffer; - ref_buffer[i][2] = this_fb->v_buffer; - - ref_fb_corrupted[i] = this_fb->corrupted; - } - - dst_buffer[0] = yv12_fb_new->y_buffer; - dst_buffer[1] = yv12_fb_new->u_buffer; - dst_buffer[2] = yv12_fb_new->v_buffer; - - xd->up_available = (start_mb_row != 0); - - xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; - xd->mode_info_stride = pc->mode_info_stride; - - for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) - { - int recon_yoffset, recon_uvoffset; - int mb_col; - int filter_level; - loop_filter_info_n *lfi_n = &pc->lf_info; - - /* save last row processed by this thread */ - last_mb_row = mb_row; - /* select bool coder for current partition */ - xd->current_bc = &pbi->mbc[mb_row%num_part]; - - if (mb_row > 0) - last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; - else - last_row_current_mb_col = &first_row_no_sync_above; - - current_mb_col = &pbi->mt_current_mb_col[mb_row]; - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - - /* reset contexts */ - xd->above_context = pc->above_context; - memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - - xd->left_available = 0; - - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - if (pbi->common.filter_level) - { - xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0*16 +32; - xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0*8 +16; - xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0*8 +16; - - xd->recon_left[0] = pbi->mt_yleft_col[mb_row]; - xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; - xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; - - /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = 1; - xd->recon_left_stride[1] = 1; - } - else - { - xd->recon_above[0] = dst_buffer[0] + recon_yoffset; - xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; - xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; - - xd->recon_left[0] = xd->recon_above[0] - 1; - xd->recon_left[1] = xd->recon_above[1] - 1; - xd->recon_left[2] = xd->recon_above[2] - 1; - - xd->recon_above[0] -= xd->dst.y_stride; - xd->recon_above[1] -= xd->dst.uv_stride; - xd->recon_above[2] -= xd->dst.uv_stride; - - /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = xd->dst.y_stride; - xd->recon_left_stride[1] = xd->dst.uv_stride; - - setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], - xd->recon_left[2], xd->dst.y_stride, - xd->dst.uv_stride); - } - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { - if (((mb_col - 1) % nsync) == 0) { - pthread_mutex_t *mutex = &pbi->pmutex[mb_row]; - protected_write(mutex, current_mb_col, mb_col - 1); - } - - if (mb_row && !(mb_col & (nsync - 1))) { - pthread_mutex_t *mutex = &pbi->pmutex[mb_row-1]; - sync_read(mutex, mb_col, last_row_current_mb_col, nsync); - } - - /* Distance of MB to the various image edges. - * These are specified to 8th pel as they are always - * compared to values that are in 1/8th pel units. - */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - - #if CONFIG_ERROR_CONCEALMENT - { - int corrupt_residual = - (!pbi->independent_partitions && - pbi->frame_corrupt_residual) || - vp8dx_bool_error(xd->current_bc); - if (pbi->ec_active && - (xd->mode_info_context->mbmi.ref_frame == - INTRA_FRAME) && - corrupt_residual) - { - /* We have an intra block with corrupt - * coefficients, better to conceal with an inter - * block. - * Interpolate MVs from neighboring MBs - * - * Note that for the first mb with corrupt - * residual in a frame, we might not discover - * that before decoding the residual. That - * happens after this check, and therefore no - * inter concealment will be done. - */ - vp8_interpolate_motion(xd, - mb_row, mb_col, - pc->mb_rows, pc->mb_cols); - } - } - #endif - - - xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; - xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; - xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; - - xd->pre.y_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset; - xd->pre.u_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset; - xd->pre.v_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset; - - /* propagate errors from reference frames */ - xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; - - mt_decode_macroblock(pbi, xd, 0); - - xd->left_available = 1; - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= vp8dx_bool_error(xd->current_bc); - - xd->recon_above[0] += 16; - xd->recon_above[1] += 8; - xd->recon_above[2] += 8; - - if (!pbi->common.filter_level) - { - xd->recon_left[0] += 16; - xd->recon_left[1] += 8; - xd->recon_left[2] += 8; - } - - if (pbi->common.filter_level) - { - int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV && - xd->mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; - const int seg = xd->mode_info_context->mbmi.segment_id; - const int ref_frame = xd->mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if( mb_row != pc->mb_rows-1 ) - { - /* Save decoded MB last row data for next-row decoding */ - memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); - memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); - memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); - } - - /* save left_col for next MB decoding */ - if(mb_col != pc->mb_cols-1) - { - MODE_INFO *next = xd->mode_info_context +1; - - if (next->mbmi.ref_frame == INTRA_FRAME) - { - for (i = 0; i < 16; i++) - pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; - for (i = 0; i < 8; i++) - { - pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; - pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; - } - } - } - - /* loopfilter on this macroblock. */ - if (filter_level) - { - if(pc->filter_type == NORMAL_LOOPFILTER) - { - loop_filter_info lfi; - FRAME_TYPE frame_type = pc->frame_type; - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - } - else - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - } - } - - } - - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; - } - - /* adjust to the next row of mbs */ - if (pbi->common.filter_level) - { - if(mb_row != pc->mb_rows-1) - { - int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; - int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1); - - for (i = 0; i < 4; i++) - { - pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; - pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; - pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; - } - } - } - else - vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - /* last MB of row is ready just after extension is done */ - protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync); - - ++xd->mode_info_context; /* skip prediction column */ - xd->up_available = 1; - - /* since we have multithread */ - xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; - } - - /* signal end of frame decoding if this thread processed the last mb_row */ - if (last_mb_row == (pc->mb_rows - 1)) - sem_post(&pbi->h_event_end_decoding); - -} - - -static THREAD_FUNCTION thread_decoding_proc(void *p_data) -{ - int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; - VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); - MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); - ENTROPY_CONTEXT_PLANES mb_row_left_context; - - while (1) - { - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) - break; - - if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) - { - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) - break; - else - { - MACROBLOCKD *xd = &mbrd->mbd; - xd->left_context = &mb_row_left_context; - - mt_decode_mb_rows(pbi, xd, ithread+1); - } - } - } - - return 0 ; -} - - -void vp8_decoder_create_threads(VP8D_COMP *pbi) -{ - int core_count = 0; - unsigned int ithread; - - pbi->b_multithreaded_rd = 0; - pbi->allocated_decoding_thread_count = 0; - pthread_mutex_init(&pbi->mt_mutex, NULL); - - /* limit decoding threads to the max number of token partitions */ - core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; - - /* limit decoding threads to the available cores */ - if (core_count > pbi->common.processor_core_count) - core_count = pbi->common.processor_core_count; - - if (core_count > 1) - { - pbi->b_multithreaded_rd = 1; - pbi->decoding_thread_count = core_count - 1; - - CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); - CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); - CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); - CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); - - for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) - { - sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); - - vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); - - pbi->de_thread_data[ithread].ithread = ithread; - pbi->de_thread_data[ithread].ptr1 = (void *)pbi; - pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; - - pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread])); - } - - sem_init(&pbi->h_event_end_decoding, 0, 0); - - pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; - } -} - - -void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) -{ - int i; - - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) - { - /* De-allocate mutex */ - if (pbi->pmutex != NULL) { - for (i = 0; i < mb_rows; i++) { - pthread_mutex_destroy(&pbi->pmutex[i]); - } - vpx_free(pbi->pmutex); - pbi->pmutex = NULL; - } - - vpx_free(pbi->mt_current_mb_col); - pbi->mt_current_mb_col = NULL ; - - /* Free above_row buffers. */ - if (pbi->mt_yabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_yabove_row[i]); - pbi->mt_yabove_row[i] = NULL ; - } - vpx_free(pbi->mt_yabove_row); - pbi->mt_yabove_row = NULL ; - } - - if (pbi->mt_uabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_uabove_row[i]); - pbi->mt_uabove_row[i] = NULL ; - } - vpx_free(pbi->mt_uabove_row); - pbi->mt_uabove_row = NULL ; - } - - if (pbi->mt_vabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_vabove_row[i]); - pbi->mt_vabove_row[i] = NULL ; - } - vpx_free(pbi->mt_vabove_row); - pbi->mt_vabove_row = NULL ; - } - - /* Free left_col buffers. */ - if (pbi->mt_yleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_yleft_col[i]); - pbi->mt_yleft_col[i] = NULL ; - } - vpx_free(pbi->mt_yleft_col); - pbi->mt_yleft_col = NULL ; - } - - if (pbi->mt_uleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_uleft_col[i]); - pbi->mt_uleft_col[i] = NULL ; - } - vpx_free(pbi->mt_uleft_col); - pbi->mt_uleft_col = NULL ; - } - - if (pbi->mt_vleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_vleft_col[i]); - pbi->mt_vleft_col[i] = NULL ; - } - vpx_free(pbi->mt_vleft_col); - pbi->mt_vleft_col = NULL ; - } - } -} - - -void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) -{ - VP8_COMMON *const pc = & pbi->common; - int i; - int uv_width; - - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) - { - vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); - - /* our internal buffers are always multiples of 16 */ - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if (width < 640) pbi->sync_range = 1; - else if (width <= 1280) pbi->sync_range = 8; - else if (width <= 2560) pbi->sync_range =16; - else pbi->sync_range = 32; - - uv_width = width >>1; - - /* Allocate mutex */ - CHECK_MEM_ERROR(pbi->pmutex, vpx_malloc(sizeof(*pbi->pmutex) * - pc->mb_rows)); - if (pbi->pmutex) { - for (i = 0; i < pc->mb_rows; i++) { - pthread_mutex_init(&pbi->pmutex[i], NULL); - } - } - - /* Allocate an int for each mb row. */ - CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); - - /* Allocate memory for above_row buffers. */ - CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)))); - - CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); - - CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); - - /* Allocate memory for left_col buffers. */ - CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); - - CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - - CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - } -} - - -void vp8_decoder_remove_threads(VP8D_COMP *pbi) -{ - /* shutdown MB Decoding thread; */ - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) - { - int i; - - protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0); - - /* allow all threads to exit */ - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - sem_post(&pbi->h_event_start_decoding[i]); - pthread_join(pbi->h_decoding_thread[i], NULL); - } - - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - sem_destroy(&pbi->h_event_start_decoding[i]); - } - - sem_destroy(&pbi->h_event_end_decoding); - - vpx_free(pbi->h_decoding_thread); - pbi->h_decoding_thread = NULL; - - vpx_free(pbi->h_event_start_decoding); - pbi->h_event_start_decoding = NULL; - - vpx_free(pbi->mb_row_di); - pbi->mb_row_di = NULL ; - - vpx_free(pbi->de_thread_data); - pbi->de_thread_data = NULL; - } - pthread_mutex_destroy(&pbi->mt_mutex); -} - -void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - VP8_COMMON *pc = &pbi->common; - unsigned int i; - int j; - - int filter_level = pc->filter_level; - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - if (filter_level) - { - /* Set above_row buffer to 127 for decoding first MB row */ - memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); - memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); - memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); - - for (j=1; jmb_rows; j++) - { - memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); - memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - } - - /* Set left_col to 129 initially */ - for (j=0; jmb_rows; j++) - { - memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); - memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); - memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); - } - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); - } - else - vp8_setup_intra_recon_top_line(yv12_fb_new); - - setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); - - for (i = 0; i < pbi->decoding_thread_count; i++) - sem_post(&pbi->h_event_start_decoding[i]); - - mt_decode_mb_rows(pbi, xd, 0); - - sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ -} diff --git a/thirdparty/libvpx/vp8/decoder/treereader.h b/thirdparty/libvpx/vp8/decoder/treereader.h deleted file mode 100644 index f7d23c3698..0000000000 --- a/thirdparty/libvpx/vp8/decoder/treereader.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_DECODER_TREEREADER_H_ -#define VP8_DECODER_TREEREADER_H_ - -#include "./vpx_config.h" -#include "vp8/common/treecoder.h" -#include "dboolhuff.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef BOOL_DECODER vp8_reader; - -#define vp8_read vp8dx_decode_bool -#define vp8_read_literal vp8_decode_value -#define vp8_read_bit(R) vp8_read(R, vp8_prob_half) - - -/* Intent of tree data structure is to make decoding trivial. */ - -static INLINE int vp8_treed_read( - vp8_reader *const r, /* !!! must return a 0 or 1 !!! */ - vp8_tree t, - const vp8_prob *const p -) -{ - register vp8_tree_index i = 0; - - while ((i = t[ i + vp8_read(r, p[i>>1])]) > 0) ; - - return -i; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_TREEREADER_H_ diff --git a/thirdparty/libvpx/vp8/vp8_dx_iface.c b/thirdparty/libvpx/vp8/vp8_dx_iface.c deleted file mode 100644 index fc9288d62b..0000000000 --- a/thirdparty/libvpx/vp8/vp8_dx_iface.c +++ /dev/null @@ -1,828 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include -#include -#include "./vp8_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "vpx/vpx_decoder.h" -#include "vpx/vp8dx.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx_version.h" -#include "common/alloccommon.h" -#include "common/common.h" -#include "common/onyxd.h" -#include "decoder/onyxd_int.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#if CONFIG_ERROR_CONCEALMENT -#include "decoder/error_concealment.h" -#endif -#include "decoder/decoderthreading.h" - -#define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) -#define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \ - VPX_CODEC_CAP_ERROR_CONCEALMENT : 0) - -typedef vpx_codec_stream_info_t vp8_stream_info_t; - -/* Structures for handling memory allocations */ -typedef enum -{ - VP8_SEG_ALG_PRIV = 256, - VP8_SEG_MAX -} mem_seg_id_t; -#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) - -struct vpx_codec_alg_priv -{ - vpx_codec_priv_t base; - vpx_codec_dec_cfg_t cfg; - vp8_stream_info_t si; - int decoder_init; - int postproc_cfg_set; - vp8_postproc_cfg_t postproc_cfg; -#if CONFIG_POSTPROC_VISUALIZER - unsigned int dbg_postproc_flag; - int dbg_color_ref_frame_flag; - int dbg_color_mb_modes_flag; - int dbg_color_b_modes_flag; - int dbg_display_mv_flag; -#endif - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - vpx_image_t img; - int img_setup; - struct frame_buffers yv12_frame_buffers; - void *user_priv; - FRAGMENT_DATA fragments; -}; - -static int vp8_init_ctx(vpx_codec_ctx_t *ctx) -{ - vpx_codec_alg_priv_t *priv = - (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); - if (!priv) return 1; - - ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->init_flags = ctx->init_flags; - - priv->si.sz = sizeof(priv->si); - priv->decrypt_cb = NULL; - priv->decrypt_state = NULL; - - if (ctx->config.dec) - { - /* Update the reference to the config structure to an internal copy. */ - priv->cfg = *ctx->config.dec; - ctx->config.dec = &priv->cfg; - } - - return 0; -} - -static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) -{ - vpx_codec_err_t res = VPX_CODEC_OK; - vpx_codec_alg_priv_t *priv = NULL; - (void) data; - - vp8_rtcd(); - vpx_dsp_rtcd(); - vpx_scale_rtcd(); - - /* This function only allocates space for the vpx_codec_alg_priv_t - * structure. More memory may be required at the time the stream - * information becomes known. - */ - if (!ctx->priv) { - if (vp8_init_ctx(ctx)) return VPX_CODEC_MEM_ERROR; - priv = (vpx_codec_alg_priv_t *)ctx->priv; - - /* initialize number of fragments to zero */ - priv->fragments.count = 0; - /* is input fragments enabled? */ - priv->fragments.enabled = - (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS); - - /*post processing level initialized to do nothing */ - } else { - priv = (vpx_codec_alg_priv_t *)ctx->priv; - } - - priv->yv12_frame_buffers.use_frame_threads = - (ctx->priv->init_flags & VPX_CODEC_USE_FRAME_THREADING); - - /* for now, disable frame threading */ - priv->yv12_frame_buffers.use_frame_threads = 0; - - if (priv->yv12_frame_buffers.use_frame_threads && - ((ctx->priv->init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT) || - (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS))) { - /* row-based threading, error concealment, and input fragments will - * not be supported when using frame-based threading */ - res = VPX_CODEC_INVALID_PARAM; - } - - return res; -} - -static vpx_codec_err_t vp8_destroy(vpx_codec_alg_priv_t *ctx) -{ - vp8_remove_decoder_instances(&ctx->yv12_frame_buffers); - - vpx_free(ctx); - - return VPX_CODEC_OK; -} - -static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) -{ - vpx_codec_err_t res = VPX_CODEC_OK; - - assert(data != NULL); - - if(data + data_sz <= data) - { - res = VPX_CODEC_INVALID_PARAM; - } - else - { - /* Parse uncompresssed part of key frame header. - * 3 bytes:- including version, frame type and an offset - * 3 bytes:- sync code (0x9d, 0x01, 0x2a) - * 4 bytes:- including image width and height in the lowest 14 bits - * of each 2-byte value. - */ - uint8_t clear_buffer[10]; - const uint8_t *clear = data; - if (decrypt_cb) - { - int n = VPXMIN(sizeof(clear_buffer), data_sz); - decrypt_cb(decrypt_state, data, clear_buffer, n); - clear = clear_buffer; - } - si->is_kf = 0; - - if (data_sz >= 10 && !(clear[0] & 0x01)) /* I-Frame */ - { - si->is_kf = 1; - - /* vet via sync code */ - if (clear[3] != 0x9d || clear[4] != 0x01 || clear[5] != 0x2a) - return VPX_CODEC_UNSUP_BITSTREAM; - - si->w = (clear[6] | (clear[7] << 8)) & 0x3fff; - si->h = (clear[8] | (clear[9] << 8)) & 0x3fff; - - /*printf("w=%d, h=%d\n", si->w, si->h);*/ - if (!(si->h | si->w)) - res = VPX_CODEC_UNSUP_BITSTREAM; - } - else - { - res = VPX_CODEC_UNSUP_BITSTREAM; - } - } - - return res; -} - -static vpx_codec_err_t vp8_peek_si(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si) { - return vp8_peek_si_internal(data, data_sz, si, NULL, NULL); -} - -static vpx_codec_err_t vp8_get_si(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si) -{ - - unsigned int sz; - - if (si->sz >= sizeof(vp8_stream_info_t)) - sz = sizeof(vp8_stream_info_t); - else - sz = sizeof(vpx_codec_stream_info_t); - - memcpy(si, &ctx->si, sz); - si->sz = sz; - - return VPX_CODEC_OK; -} - - -static vpx_codec_err_t -update_error_state(vpx_codec_alg_priv_t *ctx, - const struct vpx_internal_error_info *error) -{ - vpx_codec_err_t res; - - if ((res = error->error_code)) - ctx->base.err_detail = error->has_detail - ? error->detail - : NULL; - - return res; -} - -static void yuvconfig2image(vpx_image_t *img, - const YV12_BUFFER_CONFIG *yv12, - void *user_priv) -{ - /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ - img->fmt = VPX_IMG_FMT_I420; - img->w = yv12->y_stride; - img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15; - img->d_w = img->r_w = yv12->y_width; - img->d_h = img->r_h = yv12->y_height; - img->x_chroma_shift = 1; - img->y_chroma_shift = 1; - img->planes[VPX_PLANE_Y] = yv12->y_buffer; - img->planes[VPX_PLANE_U] = yv12->u_buffer; - img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = yv12->y_stride; - img->stride[VPX_PLANE_U] = yv12->uv_stride; - img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; - img->bit_depth = 8; - img->bps = 12; - img->user_priv = user_priv; - img->img_data = yv12->buffer_alloc; - img->img_data_owner = 0; - img->self_allocd = 0; -} - -static int -update_fragments(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - vpx_codec_err_t *res) -{ - *res = VPX_CODEC_OK; - - if (ctx->fragments.count == 0) - { - /* New frame, reset fragment pointers and sizes */ - memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs)); - memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes)); - } - if (ctx->fragments.enabled && !(data == NULL && data_sz == 0)) - { - /* Store a pointer to this fragment and return. We haven't - * received the complete frame yet, so we will wait with decoding. - */ - ctx->fragments.ptrs[ctx->fragments.count] = data; - ctx->fragments.sizes[ctx->fragments.count] = data_sz; - ctx->fragments.count++; - if (ctx->fragments.count > (1 << EIGHT_PARTITION) + 1) - { - ctx->fragments.count = 0; - *res = VPX_CODEC_INVALID_PARAM; - return -1; - } - return 0; - } - - if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) - { - return 0; - } - - if (!ctx->fragments.enabled) - { - ctx->fragments.ptrs[0] = data; - ctx->fragments.sizes[0] = data_sz; - ctx->fragments.count = 1; - } - - return 1; -} - -static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline) -{ - vpx_codec_err_t res = VPX_CODEC_OK; - unsigned int resolution_change = 0; - unsigned int w, h; - - if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) - { - return 0; - } - - /* Update the input fragment data */ - if(update_fragments(ctx, data, data_sz, &res) <= 0) - return res; - - /* Determine the stream parameters. Note that we rely on peek_si to - * validate that we have a buffer that does not wrap around the top - * of the heap. - */ - w = ctx->si.w; - h = ctx->si.h; - - res = vp8_peek_si_internal(ctx->fragments.ptrs[0], ctx->fragments.sizes[0], - &ctx->si, ctx->decrypt_cb, ctx->decrypt_state); - - if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) - { - /* the peek function returns an error for non keyframes, however for - * this case, it is not an error */ - res = VPX_CODEC_OK; - } - - if(!ctx->decoder_init && !ctx->si.is_kf) - res = VPX_CODEC_UNSUP_BITSTREAM; - - if ((ctx->si.h != h) || (ctx->si.w != w)) - resolution_change = 1; - - /* Initialize the decoder instance on the first frame*/ - if (!res && !ctx->decoder_init) - { - VP8D_CONFIG oxcf; - - oxcf.Width = ctx->si.w; - oxcf.Height = ctx->si.h; - oxcf.Version = 9; - oxcf.postprocess = 0; - oxcf.max_threads = ctx->cfg.threads; - oxcf.error_concealment = - (ctx->base.init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT); - - /* If postprocessing was enabled by the application and a - * configuration has not been provided, default it. - */ - if (!ctx->postproc_cfg_set - && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { - ctx->postproc_cfg.post_proc_flag = - VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE; - ctx->postproc_cfg.deblocking_level = 4; - ctx->postproc_cfg.noise_level = 0; - } - - res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf); - ctx->decoder_init = 1; - } - - /* Set these even if already initialized. The caller may have changed the - * decrypt config between frames. - */ - if (ctx->decoder_init) { - ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb; - ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state; - } - - if (!res) - { - VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; - if (resolution_change) - { - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; -#if CONFIG_MULTITHREAD - int i; -#endif - pc->Width = ctx->si.w; - pc->Height = ctx->si.h; - { - int prev_mb_rows = pc->mb_rows; - - if (setjmp(pbi->common.error.jmp)) - { - pbi->common.error.setjmp = 0; - vp8_clear_system_state(); - /* same return value as used in vp8dx_receive_compressed_data */ - return -1; - } - - pbi->common.error.setjmp = 1; - - if (pc->Width <= 0) - { - pc->Width = w; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame width"); - } - - if (pc->Height <= 0) - { - pc->Height = h; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame height"); - } - - if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); - - xd->pre = pc->yv12_fb[pc->lst_fb_idx]; - xd->dst = pc->yv12_fb[pc->new_fb_idx]; - -#if CONFIG_MULTITHREAD - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx]; - vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd); - } -#endif - vp8_build_block_doffsets(&pbi->mb); - - /* allocate memory for last frame MODE_INFO array */ -#if CONFIG_ERROR_CONCEALMENT - - if (pbi->ec_enabled) - { - /* old prev_mip was released by vp8_de_alloc_frame_buffers() - * called in vp8_alloc_frame_buffers() */ - pc->prev_mip = vpx_calloc( - (pc->mb_cols + 1) * (pc->mb_rows + 1), - sizeof(MODE_INFO)); - - if (!pc->prev_mip) - { - vp8_de_alloc_frame_buffers(pc); - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate" - "last frame MODE_INFO array"); - } - - pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1; - - if (vp8_alloc_overlap_lists(pbi)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate overlap lists " - "for error concealment"); - } - -#endif - -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); -#else - (void)prev_mb_rows; -#endif - } - - pbi->common.error.setjmp = 0; - - /* required to get past the first get_free_fb() call */ - pbi->common.fb_idx_ref_cnt[0] = 0; - } - - /* update the pbi fragment data */ - pbi->fragments = ctx->fragments; - - ctx->user_priv = user_priv; - if (vp8dx_receive_compressed_data(pbi, data_sz, data, deadline)) - { - res = update_error_state(ctx, &pbi->common.error); - } - - /* get ready for the next series of fragments */ - ctx->fragments.count = 0; - } - - return res; -} - -static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) -{ - vpx_image_t *img = NULL; - - /* iter acts as a flip flop, so an image is only returned on the first - * call to get_frame. - */ - if (!(*iter) && ctx->yv12_frame_buffers.pbi[0]) - { - YV12_BUFFER_CONFIG sd; - int64_t time_stamp = 0, time_end_stamp = 0; - vp8_ppflags_t flags; - vp8_zero(flags); - - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - { - flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag -#if CONFIG_POSTPROC_VISUALIZER - - | ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0) - | ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0) -#endif - ; - flags.deblocking_level = ctx->postproc_cfg.deblocking_level; - flags.noise_level = ctx->postproc_cfg.noise_level; -#if CONFIG_POSTPROC_VISUALIZER - flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag; - flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; - flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; - flags.display_mv_flag = ctx->dbg_display_mv_flag; -#endif - } - - if (0 == vp8dx_get_raw_frame(ctx->yv12_frame_buffers.pbi[0], &sd, - &time_stamp, &time_end_stamp, &flags)) - { - yuvconfig2image(&ctx->img, &sd, ctx->user_priv); - - img = &ctx->img; - *iter = img; - } - } - - return img; -} - -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) -{ - const int y_w = img->d_w; - const int y_h = img->d_h; - const int uv_w = (img->d_w + 1) / 2; - const int uv_h = (img->d_h + 1) / 2; - vpx_codec_err_t res = VPX_CODEC_OK; - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; - - yv12->y_crop_width = y_w; - yv12->y_crop_height = y_h; - yv12->y_width = y_w; - yv12->y_height = y_h; - yv12->uv_crop_width = uv_w; - yv12->uv_crop_height = uv_h; - yv12->uv_width = uv_w; - yv12->uv_height = uv_h; - - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - - yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2; - return res; -} - - -static vpx_codec_err_t vp8_set_reference(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - - if (data && !ctx->yv12_frame_buffers.use_frame_threads) - { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; - YV12_BUFFER_CONFIG sd; - - image2yuvconfig(&frame->img, &sd); - - return vp8dx_set_reference(ctx->yv12_frame_buffers.pbi[0], - frame->frame_type, &sd); - } - else - return VPX_CODEC_INVALID_PARAM; - -} - -static vpx_codec_err_t vp8_get_reference(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - - if (data && !ctx->yv12_frame_buffers.use_frame_threads) - { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; - YV12_BUFFER_CONFIG sd; - - image2yuvconfig(&frame->img, &sd); - - return vp8dx_get_reference(ctx->yv12_frame_buffers.pbi[0], - frame->frame_type, &sd); - } - else - return VPX_CODEC_INVALID_PARAM; - -} - -static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, - va_list args) -{ -#if CONFIG_POSTPROC - vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); - - if (data) - { - ctx->postproc_cfg_set = 1; - ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; - -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - - -static vpx_codec_err_t vp8_set_dbg_color_ref_frame(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_color_ref_frame_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_set_dbg_color_mb_modes(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_color_mb_modes_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_set_dbg_color_b_modes(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_color_b_modes_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_set_dbg_display_mv(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_display_mv_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - int *update_info = va_arg(args, int *); - - if (update_info && !ctx->yv12_frame_buffers.use_frame_threads) - { - VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; - - *update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME - + pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME - + pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME; - - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; -} - -extern int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ); -static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - int *ref_info = va_arg(args, int *); - - if (ref_info && !ctx->yv12_frame_buffers.use_frame_threads) - { - VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; - VP8_COMMON *oci = &pbi->common; - *ref_info = - (vp8dx_references_buffer( oci, ALTREF_FRAME )?VP8_ALTR_FRAME:0) | - (vp8dx_references_buffer( oci, GOLDEN_FRAME )?VP8_GOLD_FRAME:0) | - (vp8dx_references_buffer( oci, LAST_FRAME )?VP8_LAST_FRAME:0); - - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - - int *corrupted = va_arg(args, int *); - VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; - - if (corrupted && pbi) - { - const YV12_BUFFER_CONFIG *const frame = pbi->common.frame_to_show; - if (frame == NULL) return VPX_CODEC_ERROR; - *corrupted = frame->corrupted; - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; - -} - -static vpx_codec_err_t vp8_set_decryptor(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); - - if (init) - { - ctx->decrypt_cb = init->decrypt_cb; - ctx->decrypt_state = init->decrypt_state; - } - else - { - ctx->decrypt_cb = NULL; - ctx->decrypt_state = NULL; - } - return VPX_CODEC_OK; -} - -vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = -{ - {VP8_SET_REFERENCE, vp8_set_reference}, - {VP8_COPY_REFERENCE, vp8_get_reference}, - {VP8_SET_POSTPROC, vp8_set_postproc}, - {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_color_ref_frame}, - {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_color_mb_modes}, - {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_color_b_modes}, - {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_display_mv}, - {VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates}, - {VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted}, - {VP8D_GET_LAST_REF_USED, vp8_get_last_ref_frame}, - {VPXD_SET_DECRYPTOR, vp8_set_decryptor}, - { -1, NULL}, -}; - - -#ifndef VERSION_STRING -#define VERSION_STRING -#endif -CODEC_INTERFACE(vpx_codec_vp8_dx) = -{ - "WebM Project VP8 Decoder" VERSION_STRING, - VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC | VP8_CAP_ERROR_CONCEALMENT | - VPX_CODEC_CAP_INPUT_FRAGMENTS, - /* vpx_codec_caps_t caps; */ - vp8_init, /* vpx_codec_init_fn_t init; */ - vp8_destroy, /* vpx_codec_destroy_fn_t destroy; */ - vp8_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ - { - vp8_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */ - vp8_get_si, /* vpx_codec_get_si_fn_t get_si; */ - vp8_decode, /* vpx_codec_decode_fn_t decode; */ - vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ - NULL, - }, - { /* encoder functions */ - 0, - NULL, /* vpx_codec_enc_cfg_map_t */ - NULL, /* vpx_codec_encode_fn_t */ - NULL, /* vpx_codec_get_cx_data_fn_t */ - NULL, /* vpx_codec_enc_config_set_fn_t */ - NULL, /* vpx_codec_get_global_headers_fn_t */ - NULL, /* vpx_codec_get_preview_frame_fn_t */ - NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */ - } -}; diff --git a/thirdparty/libvpx/vp8_rtcd.h b/thirdparty/libvpx/vp8_rtcd.h deleted file mode 100644 index c5eeb5e579..0000000000 --- a/thirdparty/libvpx/vp8_rtcd.h +++ /dev/null @@ -1,9 +0,0 @@ -#include "vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - #include "rtcd/vp8_rtcd_x86.h" -#elif defined(WEBM_ARMASM) && ARCH_ARM - #include "rtcd/vp8_rtcd_arm.h" -#else - #include "rtcd/vp8_rtcd_c.h" -#endif diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c deleted file mode 100644 index 1761fada2f..0000000000 --- a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "./vp9_rtcd.h" -#include "./vpx_config.h" -#include "vp9/common/vp9_common.h" - -static int16_t sinpi_1_9 = 0x14a3; -static int16_t sinpi_2_9 = 0x26c9; -static int16_t sinpi_3_9 = 0x3441; -static int16_t sinpi_4_9 = 0x3b6c; -static int16_t cospi_8_64 = 0x3b21; -static int16_t cospi_16_64 = 0x2d41; -static int16_t cospi_24_64 = 0x187e; - -static INLINE void TRANSPOSE4X4( - int16x8_t *q8s16, - int16x8_t *q9s16) { - int32x4_t q8s32, q9s32; - int16x4x2_t d0x2s16, d1x2s16; - int32x4x2_t q0x2s32; - - d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16)); - d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16)); - - q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1])); - q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1])); - q0x2s32 = vtrnq_s32(q8s32, q9s32); - - *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]); - *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]); - return; -} - -static INLINE void GENERATE_COSINE_CONSTANTS( - int16x4_t *d0s16, - int16x4_t *d1s16, - int16x4_t *d2s16) { - *d0s16 = vdup_n_s16(cospi_8_64); - *d1s16 = vdup_n_s16(cospi_16_64); - *d2s16 = vdup_n_s16(cospi_24_64); - return; -} - -static INLINE void GENERATE_SINE_CONSTANTS( - int16x4_t *d3s16, - int16x4_t *d4s16, - int16x4_t *d5s16, - int16x8_t *q3s16) { - *d3s16 = vdup_n_s16(sinpi_1_9); - *d4s16 = vdup_n_s16(sinpi_2_9); - *q3s16 = vdupq_n_s16(sinpi_3_9); - *d5s16 = vdup_n_s16(sinpi_4_9); - return; -} - -static INLINE void IDCT4x4_1D( - int16x4_t *d0s16, - int16x4_t *d1s16, - int16x4_t *d2s16, - int16x8_t *q8s16, - int16x8_t *q9s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16; - int16x4_t d26s16, d27s16, d28s16, d29s16; - int32x4_t q10s32, q13s32, q14s32, q15s32; - int16x8_t q13s16, q14s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, *d2s16); - q10s32 = vmull_s16(d17s16, *d0s16); - q13s32 = vmull_s16(d23s16, *d1s16); - q14s32 = vmull_s16(d24s16, *d1s16); - q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16); - q10s32 = vmlal_s16(q10s32, d19s16, *d2s16); - - d26s16 = vqrshrn_n_s32(q13s32, 14); - d27s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d28s16 = vqrshrn_n_s32(q10s32, 14); - - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - *q8s16 = vaddq_s16(q13s16, q14s16); - *q9s16 = vsubq_s16(q13s16, q14s16); - *q9s16 = vcombine_s16(vget_high_s16(*q9s16), - vget_low_s16(*q9s16)); // vswp - return; -} - -static INLINE void IADST4x4_1D( - int16x4_t *d3s16, - int16x4_t *d4s16, - int16x4_t *d5s16, - int16x8_t *q3s16, - int16x8_t *q8s16, - int16x8_t *q9s16) { - int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16; - int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d6s16 = vget_low_s16(*q3s16); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - q10s32 = vmull_s16(*d3s16, d16s16); - q11s32 = vmull_s16(*d4s16, d16s16); - q12s32 = vmull_s16(d6s16, d17s16); - q13s32 = vmull_s16(*d5s16, d18s16); - q14s32 = vmull_s16(*d3s16, d18s16); - q15s32 = vmovl_s16(d16s16); - q15s32 = vaddw_s16(q15s32, d19s16); - q8s32 = vmull_s16(*d4s16, d19s16); - q15s32 = vsubw_s16(q15s32, d18s16); - q9s32 = vmull_s16(*d5s16, d19s16); - - q10s32 = vaddq_s32(q10s32, q13s32); - q10s32 = vaddq_s32(q10s32, q8s32); - q11s32 = vsubq_s32(q11s32, q14s32); - q8s32 = vdupq_n_s32(sinpi_3_9); - q11s32 = vsubq_s32(q11s32, q9s32); - q15s32 = vmulq_s32(q15s32, q8s32); - - q13s32 = vaddq_s32(q10s32, q12s32); - q10s32 = vaddq_s32(q10s32, q11s32); - q14s32 = vaddq_s32(q11s32, q12s32); - q10s32 = vsubq_s32(q10s32, q12s32); - - d16s16 = vqrshrn_n_s32(q13s32, 14); - d17s16 = vqrshrn_n_s32(q14s32, 14); - d18s16 = vqrshrn_n_s32(q15s32, 14); - d19s16 = vqrshrn_n_s32(q10s32, 14); - - *q8s16 = vcombine_s16(d16s16, d17s16); - *q9s16 = vcombine_s16(d18s16, d19s16); - return; -} - -void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { - uint8x8_t d26u8, d27u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; - uint32x2_t d26u32, d27u32; - int16x8_t q3s16, q8s16, q9s16; - uint16x8_t q8u16, q9u16; - - d26u32 = d27u32 = vdup_n_u32(0); - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - - TRANSPOSE4X4(&q8s16, &q9s16); - - switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp9_iht4x4_16_add_c(input, dest, dest_stride, tx_type); - return; - break; - case 1: // iadst_idct - // generate constants - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - break; - case 2: // idct_iadst - // generate constantsyy - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); - break; - case 3: // iadst_iadst - // generate constants - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - break; - default: // iadst_idct - assert(0); - break; - } - - q8s16 = vrshrq_n_s16(q8s16, 4); - q9s16 = vrshrq_n_s16(q9s16, 4); - - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); - dest += dest_stride; - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); - dest += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); - dest += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32)); - - d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); - return; -} diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c deleted file mode 100644 index 04b342c3d3..0000000000 --- a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "./vp9_rtcd.h" -#include "./vpx_config.h" -#include "vp9/common/vp9_common.h" - -static int16_t cospi_2_64 = 16305; -static int16_t cospi_4_64 = 16069; -static int16_t cospi_6_64 = 15679; -static int16_t cospi_8_64 = 15137; -static int16_t cospi_10_64 = 14449; -static int16_t cospi_12_64 = 13623; -static int16_t cospi_14_64 = 12665; -static int16_t cospi_16_64 = 11585; -static int16_t cospi_18_64 = 10394; -static int16_t cospi_20_64 = 9102; -static int16_t cospi_22_64 = 7723; -static int16_t cospi_24_64 = 6270; -static int16_t cospi_26_64 = 4756; -static int16_t cospi_28_64 = 3196; -static int16_t cospi_30_64 = 1606; - -static INLINE void TRANSPOSE8X8( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - *q12s16 = vcombine_s16(d17s16, d25s16); - *q13s16 = vcombine_s16(d19s16, d27s16); - *q14s16 = vcombine_s16(d21s16, d29s16); - *q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), - vreinterpretq_s32_s16(*q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), - vreinterpretq_s32_s16(*q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), - vreinterpretq_s32_s16(*q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), - vreinterpretq_s32_s16(*q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - *q8s16 = q0x2s16.val[0]; - *q9s16 = q0x2s16.val[1]; - *q10s16 = q1x2s16.val[0]; - *q11s16 = q1x2s16.val[1]; - *q12s16 = q2x2s16.val[0]; - *q13s16 = q2x2s16.val[1]; - *q14s16 = q3x2s16.val[0]; - *q15s16 = q3x2s16.val[1]; - return; -} - -static INLINE void IDCT8x8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d26s16, d2s16); - q6s32 = vmull_s16(d27s16, d2s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); - q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); - - d8s16 = vqrshrn_n_s32(q2s32, 14); - d9s16 = vqrshrn_n_s32(q3s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q2s32 = vmull_s16(d18s16, d1s16); - q3s32 = vmull_s16(d19s16, d1s16); - q9s32 = vmull_s16(d26s16, d3s16); - q13s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlal_s16(q2s32, d30s16, d0s16); - q3s32 = vmlal_s16(q3s32, d31s16, d0s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q13s32 = vmlal_s16(q13s32, d23s16, d2s16); - - d14s16 = vqrshrn_n_s32(q2s32, 14); - d15s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q13s32, 14); - q6s16 = vcombine_s16(d12s16, d13s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d0s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d0s16); - q3s32 = vmull_s16(d17s16, d0s16); - q13s32 = vmull_s16(d16s16, d0s16); - q15s32 = vmull_s16(d17s16, d0s16); - - q2s32 = vmlal_s16(q2s32, d24s16, d0s16); - q3s32 = vmlal_s16(q3s32, d25s16, d0s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); - q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); - - d0s16 = vdup_n_s16(cospi_24_64); - d1s16 = vdup_n_s16(cospi_8_64); - - d18s16 = vqrshrn_n_s32(q2s32, 14); - d19s16 = vqrshrn_n_s32(q3s32, 14); - d22s16 = vqrshrn_n_s32(q13s32, 14); - d23s16 = vqrshrn_n_s32(q15s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q2s32 = vmull_s16(d20s16, d0s16); - q3s32 = vmull_s16(d21s16, d0s16); - q8s32 = vmull_s16(d20s16, d1s16); - q12s32 = vmull_s16(d21s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); - q8s32 = vmlal_s16(q8s32, d28s16, d0s16); - q12s32 = vmlal_s16(q12s32, d29s16, d0s16); - - d26s16 = vqrshrn_n_s32(q2s32, 14); - d27s16 = vqrshrn_n_s32(q3s32, 14); - d30s16 = vqrshrn_n_s32(q8s32, 14); - d31s16 = vqrshrn_n_s32(q12s32, 14); - *q13s16 = vcombine_s16(d26s16, d27s16); - *q15s16 = vcombine_s16(d30s16, d31s16); - - q0s16 = vaddq_s16(*q9s16, *q15s16); - q1s16 = vaddq_s16(*q11s16, *q13s16); - q2s16 = vsubq_s16(*q11s16, *q13s16); - q3s16 = vsubq_s16(*q9s16, *q15s16); - - *q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - *q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vqrshrn_n_s32(q9s32, 14); - d11s16 = vqrshrn_n_s32(q10s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - *q8s16 = vaddq_s16(q0s16, q7s16); - *q9s16 = vaddq_s16(q1s16, q6s16); - *q10s16 = vaddq_s16(q2s16, q5s16); - *q11s16 = vaddq_s16(q3s16, q4s16); - *q12s16 = vsubq_s16(q3s16, q4s16); - *q13s16 = vsubq_s16(q2s16, q5s16); - *q14s16 = vsubq_s16(q1s16, q6s16); - *q15s16 = vsubq_s16(q0s16, q7s16); - return; -} - -static INLINE void IADST8X8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q2s16, q4s16, q5s16, q6s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32; - int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - d14s16 = vdup_n_s16(cospi_2_64); - d15s16 = vdup_n_s16(cospi_30_64); - - q1s32 = vmull_s16(d30s16, d14s16); - q2s32 = vmull_s16(d31s16, d14s16); - q3s32 = vmull_s16(d30s16, d15s16); - q4s32 = vmull_s16(d31s16, d15s16); - - d30s16 = vdup_n_s16(cospi_18_64); - d31s16 = vdup_n_s16(cospi_14_64); - - q1s32 = vmlal_s16(q1s32, d16s16, d15s16); - q2s32 = vmlal_s16(q2s32, d17s16, d15s16); - q3s32 = vmlsl_s16(q3s32, d16s16, d14s16); - q4s32 = vmlsl_s16(q4s32, d17s16, d14s16); - - q5s32 = vmull_s16(d22s16, d30s16); - q6s32 = vmull_s16(d23s16, d30s16); - q7s32 = vmull_s16(d22s16, d31s16); - q8s32 = vmull_s16(d23s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d24s16, d31s16); - q6s32 = vmlal_s16(q6s32, d25s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d24s16, d30s16); - q8s32 = vmlsl_s16(q8s32, d25s16, d30s16); - - q11s32 = vaddq_s32(q1s32, q5s32); - q12s32 = vaddq_s32(q2s32, q6s32); - q1s32 = vsubq_s32(q1s32, q5s32); - q2s32 = vsubq_s32(q2s32, q6s32); - - d22s16 = vqrshrn_n_s32(q11s32, 14); - d23s16 = vqrshrn_n_s32(q12s32, 14); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q12s32 = vaddq_s32(q3s32, q7s32); - q15s32 = vaddq_s32(q4s32, q8s32); - q3s32 = vsubq_s32(q3s32, q7s32); - q4s32 = vsubq_s32(q4s32, q8s32); - - d2s16 = vqrshrn_n_s32(q1s32, 14); - d3s16 = vqrshrn_n_s32(q2s32, 14); - d24s16 = vqrshrn_n_s32(q12s32, 14); - d25s16 = vqrshrn_n_s32(q15s32, 14); - d6s16 = vqrshrn_n_s32(q3s32, 14); - d7s16 = vqrshrn_n_s32(q4s32, 14); - *q12s16 = vcombine_s16(d24s16, d25s16); - - d0s16 = vdup_n_s16(cospi_10_64); - d1s16 = vdup_n_s16(cospi_22_64); - q4s32 = vmull_s16(d26s16, d0s16); - q5s32 = vmull_s16(d27s16, d0s16); - q2s32 = vmull_s16(d26s16, d1s16); - q6s32 = vmull_s16(d27s16, d1s16); - - d30s16 = vdup_n_s16(cospi_26_64); - d31s16 = vdup_n_s16(cospi_6_64); - - q4s32 = vmlal_s16(q4s32, d20s16, d1s16); - q5s32 = vmlal_s16(q5s32, d21s16, d1s16); - q2s32 = vmlsl_s16(q2s32, d20s16, d0s16); - q6s32 = vmlsl_s16(q6s32, d21s16, d0s16); - - q0s32 = vmull_s16(d18s16, d30s16); - q13s32 = vmull_s16(d19s16, d30s16); - - q0s32 = vmlal_s16(q0s32, d28s16, d31s16); - q13s32 = vmlal_s16(q13s32, d29s16, d31s16); - - q10s32 = vmull_s16(d18s16, d31s16); - q9s32 = vmull_s16(d19s16, d31s16); - - q10s32 = vmlsl_s16(q10s32, d28s16, d30s16); - q9s32 = vmlsl_s16(q9s32, d29s16, d30s16); - - q14s32 = vaddq_s32(q2s32, q10s32); - q15s32 = vaddq_s32(q6s32, q9s32); - q2s32 = vsubq_s32(q2s32, q10s32); - q6s32 = vsubq_s32(q6s32, q9s32); - - d28s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d4s16 = vqrshrn_n_s32(q2s32, 14); - d5s16 = vqrshrn_n_s32(q6s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - q9s32 = vaddq_s32(q4s32, q0s32); - q10s32 = vaddq_s32(q5s32, q13s32); - q4s32 = vsubq_s32(q4s32, q0s32); - q5s32 = vsubq_s32(q5s32, q13s32); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - d18s16 = vqrshrn_n_s32(q9s32, 14); - d19s16 = vqrshrn_n_s32(q10s32, 14); - d8s16 = vqrshrn_n_s32(q4s32, 14); - d9s16 = vqrshrn_n_s32(q5s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q5s32 = vmull_s16(d2s16, d30s16); - q6s32 = vmull_s16(d3s16, d30s16); - q7s32 = vmull_s16(d2s16, d31s16); - q0s32 = vmull_s16(d3s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d6s16, d31s16); - q6s32 = vmlal_s16(q6s32, d7s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d6s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d7s16, d30s16); - - q1s32 = vmull_s16(d4s16, d30s16); - q3s32 = vmull_s16(d5s16, d30s16); - q10s32 = vmull_s16(d4s16, d31s16); - q2s32 = vmull_s16(d5s16, d31s16); - - q1s32 = vmlsl_s16(q1s32, d8s16, d31s16); - q3s32 = vmlsl_s16(q3s32, d9s16, d31s16); - q10s32 = vmlal_s16(q10s32, d8s16, d30s16); - q2s32 = vmlal_s16(q2s32, d9s16, d30s16); - - *q8s16 = vaddq_s16(*q11s16, *q9s16); - *q11s16 = vsubq_s16(*q11s16, *q9s16); - q4s16 = vaddq_s16(*q12s16, *q14s16); - *q12s16 = vsubq_s16(*q12s16, *q14s16); - - q14s32 = vaddq_s32(q5s32, q1s32); - q15s32 = vaddq_s32(q6s32, q3s32); - q5s32 = vsubq_s32(q5s32, q1s32); - q6s32 = vsubq_s32(q6s32, q3s32); - - d18s16 = vqrshrn_n_s32(q14s32, 14); - d19s16 = vqrshrn_n_s32(q15s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q1s32 = vaddq_s32(q7s32, q10s32); - q3s32 = vaddq_s32(q0s32, q2s32); - q7s32 = vsubq_s32(q7s32, q10s32); - q0s32 = vsubq_s32(q0s32, q2s32); - - d28s16 = vqrshrn_n_s32(q1s32, 14); - d29s16 = vqrshrn_n_s32(q3s32, 14); - d14s16 = vqrshrn_n_s32(q7s32, 14); - d15s16 = vqrshrn_n_s32(q0s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - d30s16 = vdup_n_s16(cospi_16_64); - - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - q2s32 = vmull_s16(d22s16, d30s16); - q3s32 = vmull_s16(d23s16, d30s16); - q13s32 = vmull_s16(d22s16, d30s16); - q1s32 = vmull_s16(d23s16, d30s16); - - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - q2s32 = vmlal_s16(q2s32, d24s16, d30s16); - q3s32 = vmlal_s16(q3s32, d25s16, d30s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d30s16); - q1s32 = vmlsl_s16(q1s32, d25s16, d30s16); - - d4s16 = vqrshrn_n_s32(q2s32, 14); - d5s16 = vqrshrn_n_s32(q3s32, 14); - d24s16 = vqrshrn_n_s32(q13s32, 14); - d25s16 = vqrshrn_n_s32(q1s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - *q12s16 = vcombine_s16(d24s16, d25s16); - - q13s32 = vmull_s16(d10s16, d30s16); - q1s32 = vmull_s16(d11s16, d30s16); - q11s32 = vmull_s16(d10s16, d30s16); - q0s32 = vmull_s16(d11s16, d30s16); - - q13s32 = vmlal_s16(q13s32, d14s16, d30s16); - q1s32 = vmlal_s16(q1s32, d15s16, d30s16); - q11s32 = vmlsl_s16(q11s32, d14s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d15s16, d30s16); - - d20s16 = vqrshrn_n_s32(q13s32, 14); - d21s16 = vqrshrn_n_s32(q1s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q0s32, 14); - *q10s16 = vcombine_s16(d20s16, d21s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - q5s16 = vdupq_n_s16(0); - - *q9s16 = vsubq_s16(q5s16, *q9s16); - *q11s16 = vsubq_s16(q5s16, q2s16); - *q13s16 = vsubq_s16(q5s16, q6s16); - *q15s16 = vsubq_s16(q5s16, q4s16); - return; -} - -void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { - int i; - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 8 * 2); - q11s16 = vld1q_s16(input + 8 * 3); - q12s16 = vld1q_s16(input + 8 * 4); - q13s16 = vld1q_s16(input + 8 * 5); - q14s16 = vld1q_s16(input + 8 * 6); - q15s16 = vld1q_s16(input + 8 * 7); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp9_iht8x8_64_add_c(input, dest, dest_stride, tx_type); - return; - break; - case 1: // iadst_idct - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // first transform rows - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - case 2: // idct_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // then transform columns - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - case 3: // iadst_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - default: // iadst_idct - assert(0); - break; - } - - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - for (d1 = d2 = dest, i = 0; i < 2; i++) { - if (i != 0) { - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - } - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - } - return; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.c b/thirdparty/libvpx/vp9/common/vp9_alloccommon.c deleted file mode 100644 index 7dd1005d3f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_alloccommon.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_onyxc_int.h" - -// TODO(hkuang): Don't need to lock the whole pool after implementing atomic -// frame reference count. -void lock_buffer_pool(BufferPool *const pool) { -#if CONFIG_MULTITHREAD - pthread_mutex_lock(&pool->pool_mutex); -#else - (void)pool; -#endif -} - -void unlock_buffer_pool(BufferPool *const pool) { -#if CONFIG_MULTITHREAD - pthread_mutex_unlock(&pool->pool_mutex); -#else - (void)pool; -#endif -} - -void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { - const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); - const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); - - cm->mi_cols = aligned_width >> MI_SIZE_LOG2; - cm->mi_rows = aligned_height >> MI_SIZE_LOG2; - cm->mi_stride = calc_mi_size(cm->mi_cols); - - cm->mb_cols = (cm->mi_cols + 1) >> 1; - cm->mb_rows = (cm->mi_rows + 1) >> 1; - cm->MBs = cm->mb_rows * cm->mb_cols; -} - -static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { - int i; - - for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { - cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1); - if (cm->seg_map_array[i] == NULL) - return 1; - } - cm->seg_map_alloc_size = seg_map_size; - - // Init the index. - cm->seg_map_idx = 0; - cm->prev_seg_map_idx = 1; - - cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; - if (!cm->frame_parallel_decode) - cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; - - return 0; -} - -static void free_seg_map(VP9_COMMON *cm) { - int i; - - for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { - vpx_free(cm->seg_map_array[i]); - cm->seg_map_array[i] = NULL; - } - - cm->current_frame_seg_map = NULL; - - if (!cm->frame_parallel_decode) { - cm->last_frame_seg_map = NULL; - } -} - -void vp9_free_ref_frame_buffers(BufferPool *pool) { - int i; - - for (i = 0; i < FRAME_BUFFERS; ++i) { - if (pool->frame_bufs[i].ref_count > 0 && - pool->frame_bufs[i].raw_frame_buffer.data != NULL) { - pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer); - pool->frame_bufs[i].ref_count = 0; - } - vpx_free(pool->frame_bufs[i].mvs); - pool->frame_bufs[i].mvs = NULL; - vpx_free_frame_buffer(&pool->frame_bufs[i].buf); - } -} - -void vp9_free_postproc_buffers(VP9_COMMON *cm) { -#if CONFIG_VP9_POSTPROC - vpx_free_frame_buffer(&cm->post_proc_buffer); - vpx_free_frame_buffer(&cm->post_proc_buffer_int); -#else - (void)cm; -#endif -} - -void vp9_free_context_buffers(VP9_COMMON *cm) { - cm->free_mi(cm); - free_seg_map(cm); - vpx_free(cm->above_context); - cm->above_context = NULL; - vpx_free(cm->above_seg_context); - cm->above_seg_context = NULL; - vpx_free(cm->lf.lfm); - cm->lf.lfm = NULL; -} - - -int vp9_alloc_loop_filter(VP9_COMMON *cm) { - vpx_free(cm->lf.lfm); - // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The - // stride and rows are rounded up / truncated to a multiple of 8. - cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; - cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( - ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, - sizeof(*cm->lf.lfm)); - if (!cm->lf.lfm) - return 1; - return 0; -} - -int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { - int new_mi_size; - - vp9_set_mb_mi(cm, width, height); - new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); - if (cm->mi_alloc_size < new_mi_size) { - cm->free_mi(cm); - if (cm->alloc_mi(cm, new_mi_size)) - goto fail; - } - - if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) { - // Create the segmentation map structure and set to 0. - free_seg_map(cm); - if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) - goto fail; - } - - if (cm->above_context_alloc_cols < cm->mi_cols) { - vpx_free(cm->above_context); - cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc( - 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE, - sizeof(*cm->above_context)); - if (!cm->above_context) goto fail; - - vpx_free(cm->above_seg_context); - cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( - mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); - if (!cm->above_seg_context) goto fail; - cm->above_context_alloc_cols = cm->mi_cols; - } - - if (vp9_alloc_loop_filter(cm)) - goto fail; - - return 0; - - fail: - vp9_free_context_buffers(cm); - return 1; -} - -void vp9_remove_common(VP9_COMMON *cm) { - vp9_free_context_buffers(cm); - - vpx_free(cm->fc); - cm->fc = NULL; - vpx_free(cm->frame_contexts); - cm->frame_contexts = NULL; -} - -void vp9_init_context_buffers(VP9_COMMON *cm) { - cm->setup_mi(cm); - if (cm->last_frame_seg_map && !cm->frame_parallel_decode) - memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); -} - -void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) { - // Swap indices. - const int tmp = cm->seg_map_idx; - cm->seg_map_idx = cm->prev_seg_map_idx; - cm->prev_seg_map_idx = tmp; - - cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; - cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.h b/thirdparty/libvpx/vp9/common/vp9_alloccommon.h deleted file mode 100644 index e53955b998..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_alloccommon.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ -#define VP9_COMMON_VP9_ALLOCCOMMON_H_ - -#define INVALID_IDX -1 // Invalid buffer index. - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; -struct BufferPool; - -void vp9_remove_common(struct VP9Common *cm); - -int vp9_alloc_loop_filter(struct VP9Common *cm); -int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); -void vp9_init_context_buffers(struct VP9Common *cm); -void vp9_free_context_buffers(struct VP9Common *cm); - -void vp9_free_ref_frame_buffers(struct BufferPool *pool); -void vp9_free_postproc_buffers(struct VP9Common *cm); - -int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); -void vp9_free_state_buffers(struct VP9Common *cm); - -void vp9_set_mb_mi(struct VP9Common *cm, int width, int height); - -void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.c b/thirdparty/libvpx/vp9/common/vp9_blockd.c deleted file mode 100644 index 7bab27d4fd..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_blockd.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_blockd.h" - -PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b) { - if (b == 0 || b == 2) { - if (!left_mi || is_inter_block(left_mi)) - return DC_PRED; - - return get_y_mode(left_mi, b + 1); - } else { - assert(b == 1 || b == 3); - return cur_mi->bmi[b - 1].as_mode; - } -} - -PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b) { - if (b == 0 || b == 1) { - if (!above_mi || is_inter_block(above_mi)) - return DC_PRED; - - return get_y_mode(above_mi, b + 2); - } else { - assert(b == 2 || b == 3); - return cur_mi->bmi[b - 2].as_mode; - } -} - -void vp9_foreach_transformed_block_in_plane( - const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, - foreach_transformed_block_visitor visit, void *arg) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO* mi = xd->mi[0]; - // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") - // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 - // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) - : mi->tx_size; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const int step = 1 << (tx_size << 1); - int i = 0, r, c; - - // If mb_to_right_edge is < 0 we are in a situation in which - // the current block size extends into the UMV and we won't - // visit the sub blocks that are wholly within the UMV. - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : - xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : - xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step; - - // Keep track of the row and column of the blocks we use so that we know - // if we are in the unrestricted motion border. - for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { - // Skip visiting the sub blocks that are wholly within the UMV. - for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { - visit(plane, i, plane_bsize, tx_size, arg); - i += step; - } - i += extra_step; - } -} - -void vp9_foreach_transformed_block(const MACROBLOCKD* const xd, - BLOCK_SIZE bsize, - foreach_transformed_block_visitor visit, - void *arg) { - int plane; - - for (plane = 0; plane < MAX_MB_PLANE; ++plane) - vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); -} - -void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, - int aoff, int loff) { - ENTROPY_CONTEXT *const a = pd->above_context + aoff; - ENTROPY_CONTEXT *const l = pd->left_context + loff; - const int tx_size_in_blocks = 1 << tx_size; - - // above - if (has_eob && xd->mb_to_right_edge < 0) { - int i; - const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] + - (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - int above_contexts = tx_size_in_blocks; - if (above_contexts + aoff > blocks_wide) - above_contexts = blocks_wide - aoff; - - for (i = 0; i < above_contexts; ++i) - a[i] = has_eob; - for (i = above_contexts; i < tx_size_in_blocks; ++i) - a[i] = 0; - } else { - memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); - } - - // left - if (has_eob && xd->mb_to_bottom_edge < 0) { - int i; - const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] + - (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - int left_contexts = tx_size_in_blocks; - if (left_contexts + loff > blocks_high) - left_contexts = blocks_high - loff; - - for (i = 0; i < left_contexts; ++i) - l[i] = has_eob; - for (i = left_contexts; i < tx_size_in_blocks; ++i) - l[i] = 0; - } else { - memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); - } -} - -void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].subsampling_x = i ? ss_x : 0; - xd->plane[i].subsampling_y = i ? ss_y : 0; - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.h b/thirdparty/libvpx/vp9/common/vp9_blockd.h deleted file mode 100644 index 3d26fb2b5d..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_blockd.h +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_VP9_BLOCKD_H_ -#define VP9_COMMON_VP9_BLOCKD_H_ - -#include "./vpx_config.h" - -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" -#include "vpx_scale/yv12config.h" - -#include "vp9/common/vp9_common_data.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_mv.h" -#include "vp9/common/vp9_scale.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_tile_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_MB_PLANE 3 - -typedef enum { - KEY_FRAME = 0, - INTER_FRAME = 1, - FRAME_TYPES, -} FRAME_TYPE; - -static INLINE int is_inter_mode(PREDICTION_MODE mode) { - return mode >= NEARESTMV && mode <= NEWMV; -} - -/* For keyframes, intra block modes are predicted by the (already decoded) - modes for the Y blocks to the left and above us; for interframes, there - is a single probability table. */ - -typedef struct { - PREDICTION_MODE as_mode; - int_mv as_mv[2]; // first, second inter predictor motion vectors -} b_mode_info; - -// Note that the rate-distortion optimization loop, bit-stream writer, and -// decoder implementation modules critically rely on the defined entry values -// specified herein. They should be refactored concurrently. - -#define NONE -1 -#define INTRA_FRAME 0 -#define LAST_FRAME 1 -#define GOLDEN_FRAME 2 -#define ALTREF_FRAME 3 -#define MAX_REF_FRAMES 4 -typedef int8_t MV_REFERENCE_FRAME; - -// This structure now relates to 8x8 block regions. -typedef struct MODE_INFO { - // Common for both INTER and INTRA blocks - BLOCK_SIZE sb_type; - PREDICTION_MODE mode; - TX_SIZE tx_size; - int8_t skip; - int8_t segment_id; - int8_t seg_id_predicted; // valid only when temporal_update is enabled - - // Only for INTRA blocks - PREDICTION_MODE uv_mode; - - // Only for INTER blocks - INTERP_FILTER interp_filter; - MV_REFERENCE_FRAME ref_frame[2]; - - // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. - int_mv mv[2]; - - b_mode_info bmi[4]; -} MODE_INFO; - -static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { - return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode - : mi->mode; -} - -static INLINE int is_inter_block(const MODE_INFO *mi) { - return mi->ref_frame[0] > INTRA_FRAME; -} - -static INLINE int has_second_ref(const MODE_INFO *mi) { - return mi->ref_frame[1] > INTRA_FRAME; -} - -PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b); - -PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b); - -enum mv_precision { - MV_PRECISION_Q3, - MV_PRECISION_Q4 -}; - -struct buf_2d { - uint8_t *buf; - int stride; -}; - -struct macroblockd_plane { - tran_low_t *dqcoeff; - int subsampling_x; - int subsampling_y; - struct buf_2d dst; - struct buf_2d pre[2]; - ENTROPY_CONTEXT *above_context; - ENTROPY_CONTEXT *left_context; - int16_t seg_dequant[MAX_SEGMENTS][2]; - - // number of 4x4s in current block - uint16_t n4_w, n4_h; - // log2 of n4_w, n4_h - uint8_t n4_wl, n4_hl; - - // encoder - const int16_t *dequant; -}; - -#define BLOCK_OFFSET(x, i) ((x) + (i) * 16) - -typedef struct RefBuffer { - // TODO(dkovalev): idx is not really required and should be removed, now it - // is used in vp9_onyxd_if.c - int idx; - YV12_BUFFER_CONFIG *buf; - struct scale_factors sf; -} RefBuffer; - -typedef struct macroblockd { - struct macroblockd_plane plane[MAX_MB_PLANE]; - uint8_t bmode_blocks_wl; - uint8_t bmode_blocks_hl; - - FRAME_COUNTS *counts; - TileInfo tile; - - int mi_stride; - - MODE_INFO **mi; - MODE_INFO *left_mi; - MODE_INFO *above_mi; - - unsigned int max_blocks_wide; - unsigned int max_blocks_high; - - const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; - - /* Distance of MB away from frame edges */ - int mb_to_left_edge; - int mb_to_right_edge; - int mb_to_top_edge; - int mb_to_bottom_edge; - - FRAME_CONTEXT *fc; - - /* pointers to reference frames */ - RefBuffer *block_refs[2]; - - /* pointer to current frame */ - const YV12_BUFFER_CONFIG *cur_buf; - - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; - - PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[8]; - -#if CONFIG_VP9_HIGHBITDEPTH - /* Bit depth: 8, 10, 12 */ - int bd; -#endif - - int lossless; - int corrupted; - - struct vpx_internal_error_info *error_info; -} MACROBLOCKD; - -static INLINE PLANE_TYPE get_plane_type(int plane) { - return (PLANE_TYPE)(plane > 0); -} - -static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, - PARTITION_TYPE partition) { - return subsize_lookup[partition][bsize]; -} - -extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; - -static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, - const MACROBLOCKD *xd) { - const MODE_INFO *const mi = xd->mi[0]; - - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) - return DCT_DCT; - - return intra_mode_to_tx_type_lookup[mi->mode]; -} - -static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, - const MACROBLOCKD *xd, int ib) { - const MODE_INFO *const mi = xd->mi[0]; - - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) - return DCT_DCT; - - return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; -} - -void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); - -static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, - int xss, int yss) { - if (bsize < BLOCK_8X8) { - return TX_4X4; - } else { - const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss]; - return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]); - } -} - -static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi, - const struct macroblockd_plane *pd) { - return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x, - pd->subsampling_y); -} - -static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, - const struct macroblockd_plane *pd) { - return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; -} - -static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) { - struct macroblockd_plane *const pd = &xd->plane[i]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - memset(pd->above_context, 0, - sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]); - memset(pd->left_context, 0, - sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]); - } -} - -static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi, - const MODE_INFO *above_mi, - const MODE_INFO *left_mi, - int block) { - const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); - const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); - return vp9_kf_y_mode_prob[above][left]; -} - -typedef void (*foreach_transformed_block_visitor)(int plane, int block, - BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, - void *arg); - -void vp9_foreach_transformed_block_in_plane( - const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, - foreach_transformed_block_visitor visit, void *arg); - - -void vp9_foreach_transformed_block( - const MACROBLOCKD* const xd, BLOCK_SIZE bsize, - foreach_transformed_block_visitor visit, void *arg); - -static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, int block, - int *x, int *y) { - const int bwl = b_width_log2_lookup[plane_bsize]; - const int tx_cols_log2 = bwl - tx_size; - const int tx_cols = 1 << tx_cols_log2; - const int raster_mb = block >> (tx_size << 1); - *x = (raster_mb & (tx_cols - 1)) << tx_size; - *y = (raster_mb >> tx_cols_log2) << tx_size; -} - -void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, - int aoff, int loff); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common.h b/thirdparty/libvpx/vp9/common/vp9_common.h deleted file mode 100644 index 908fa80a31..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_common.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_COMMON_H_ -#define VP9_COMMON_VP9_COMMON_H_ - -/* Interface header for common constant data structures and lookup tables */ - -#include - -#include "./vpx_config.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/bitops.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Only need this for fixed-size arrays, for structs just assign. -#define vp9_copy(dest, src) { \ - assert(sizeof(dest) == sizeof(src)); \ - memcpy(dest, src, sizeof(src)); \ - } - -// Use this for variably-sized arrays. -#define vp9_copy_array(dest, src, n) { \ - assert(sizeof(*dest) == sizeof(*src)); \ - memcpy(dest, src, n * sizeof(*src)); \ - } - -#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest)) -#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest)) - -static INLINE int get_unsigned_bits(unsigned int num_values) { - return num_values > 0 ? get_msb(num_values) + 1 : 0; -} - -#if CONFIG_DEBUG -#define CHECK_MEM_ERROR(cm, lval, expr) do { \ - lval = (expr); \ - if (!lval) \ - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ - "Failed to allocate "#lval" at %s:%d", \ - __FILE__, __LINE__); \ - } while (0) -#else -#define CHECK_MEM_ERROR(cm, lval, expr) do { \ - lval = (expr); \ - if (!lval) \ - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ - "Failed to allocate "#lval); \ - } while (0) -#endif - -#define VP9_SYNC_CODE_0 0x49 -#define VP9_SYNC_CODE_1 0x83 -#define VP9_SYNC_CODE_2 0x42 - -#define VP9_FRAME_MARKER 0x2 - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.c b/thirdparty/libvpx/vp9/common/vp9_common_data.c deleted file mode 100644 index 3409d04844..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_common_data.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_common_data.h" -#include "vpx_dsp/vpx_dsp_common.h" - -// Log 2 conversion lookup tables for block width and height -const uint8_t b_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4}; -const uint8_t b_height_log2_lookup[BLOCK_SIZES] = - {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4}; -const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16}; -const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = - {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16}; -// Log 2 conversion lookup tables for modeinfo width and height -const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; -const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; -const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; - -// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) -const uint8_t size_group_lookup[BLOCK_SIZES] = - {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; - -const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = - {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; - -const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { - { // 4X4 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID - }, { // 8X8 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID - }, { // 16X16 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID - }, { // 32X32 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, - PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID - }, { // 64X64 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, - PARTITION_NONE - } -}; - -const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { - { // PARTITION_NONE - BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, - BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, - BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, - BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, - BLOCK_64X64, - }, { // PARTITION_HORZ - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_64X32, - }, { // PARTITION_VERT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X64, - }, { // PARTITION_SPLIT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X32, - } -}; - -const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { - TX_4X4, TX_4X4, TX_4X4, - TX_8X8, TX_8X8, TX_8X8, - TX_16X16, TX_16X16, TX_16X16, - TX_32X32, TX_32X32, TX_32X32, TX_32X32 -}; - -const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { - BLOCK_4X4, // TX_4X4 - BLOCK_8X8, // TX_8X8 - BLOCK_16X16, // TX_16X16 - BLOCK_32X32, // TX_32X32 -}; - -const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { - TX_4X4, // ONLY_4X4 - TX_8X8, // ALLOW_8X8 - TX_16X16, // ALLOW_16X16 - TX_32X32, // ALLOW_32X32 - TX_32X32, // TX_MODE_SELECT -}; - -const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { -// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 -// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 - {{BLOCK_4X4, BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}}, - {{BLOCK_4X8, BLOCK_4X4}, {BLOCK_INVALID, BLOCK_INVALID}}, - {{BLOCK_8X4, BLOCK_INVALID}, {BLOCK_4X4, BLOCK_INVALID}}, - {{BLOCK_8X8, BLOCK_8X4}, {BLOCK_4X8, BLOCK_4X4}}, - {{BLOCK_8X16, BLOCK_8X8}, {BLOCK_INVALID, BLOCK_4X8}}, - {{BLOCK_16X8, BLOCK_INVALID}, {BLOCK_8X8, BLOCK_8X4}}, - {{BLOCK_16X16, BLOCK_16X8}, {BLOCK_8X16, BLOCK_8X8}}, - {{BLOCK_16X32, BLOCK_16X16}, {BLOCK_INVALID, BLOCK_8X16}}, - {{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16, BLOCK_16X8}}, - {{BLOCK_32X32, BLOCK_32X16}, {BLOCK_16X32, BLOCK_16X16}}, - {{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}}, - {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}}, - {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, -}; - -// Generates 4 bit field in which each bit set to 1 represents -// a blocksize partition 1111 means we split 64x64, 32x32, 16x16 -// and 8x8. 1000 means we just split the 64x64 to 32x32 -const struct { - PARTITION_CONTEXT above; - PARTITION_CONTEXT left; -} partition_context_lookup[BLOCK_SIZES]= { - {15, 15}, // 4X4 - {0b1111, 0b1111} - {15, 14}, // 4X8 - {0b1111, 0b1110} - {14, 15}, // 8X4 - {0b1110, 0b1111} - {14, 14}, // 8X8 - {0b1110, 0b1110} - {14, 12}, // 8X16 - {0b1110, 0b1100} - {12, 14}, // 16X8 - {0b1100, 0b1110} - {12, 12}, // 16X16 - {0b1100, 0b1100} - {12, 8 }, // 16X32 - {0b1100, 0b1000} - {8, 12}, // 32X16 - {0b1000, 0b1100} - {8, 8 }, // 32X32 - {0b1000, 0b1000} - {8, 0 }, // 32X64 - {0b1000, 0b0000} - {0, 8 }, // 64X32 - {0b0000, 0b1000} - {0, 0 }, // 64X64 - {0b0000, 0b0000} -}; - -#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH -const uint8_t need_top_left[INTRA_MODES] = { - 0, // DC_PRED - 0, // V_PRED - 0, // H_PRED - 0, // D45_PRED - 1, // D135_PRED - 1, // D117_PRED - 1, // D153_PRED - 0, // D207_PRED - 0, // D63_PRED - 1, // TM_PRED -}; -#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.h b/thirdparty/libvpx/vp9/common/vp9_common_data.h deleted file mode 100644 index 0ae24dad54..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_common_data.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_COMMON_DATA_H_ -#define VP9_COMMON_VP9_COMMON_DATA_H_ - -#include "vp9/common/vp9_enums.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern const uint8_t b_width_log2_lookup[BLOCK_SIZES]; -extern const uint8_t b_height_log2_lookup[BLOCK_SIZES]; -extern const uint8_t mi_width_log2_lookup[BLOCK_SIZES]; -extern const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES]; -extern const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES]; -extern const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES]; -extern const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES]; -extern const uint8_t size_group_lookup[BLOCK_SIZES]; -extern const uint8_t num_pels_log2_lookup[BLOCK_SIZES]; -extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES]; -extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES]; -extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; -extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES]; -extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; -extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; -#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH -extern const uint8_t need_top_left[INTRA_MODES]; -#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_COMMON_DATA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_debugmodes.c b/thirdparty/libvpx/vp9/common/vp9_debugmodes.c deleted file mode 100644 index d9c1fd9686..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_debugmodes.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_onyxc_int.h" - -static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { - fprintf(f, "%s", str); - fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame, - cm->show_frame, cm->base_qindex); -} -/* This function dereferences a pointer to the mbmi structure - * and uses the passed in member offset to print out the value of an integer - * for each mbmi member value in the mi structure. - */ -static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, - size_t member_offset) { - int mi_row, mi_col; - MODE_INFO **mi = cm->mi_grid_visible; - int rows = cm->mi_rows; - int cols = cm->mi_cols; - char prefix = descriptor[0]; - - log_frame_info(cm, descriptor, file); - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(file, "%c ", prefix); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(file, "%2d ", - *((int*) ((char *) (mi[0]) + - member_offset))); - mi++; - } - fprintf(file, "\n"); - mi += 8; - } - fprintf(file, "\n"); -} - -void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { - int mi_row; - int mi_col; - FILE *mvs = fopen(file, "a"); - MODE_INFO **mi = cm->mi_grid_visible; - int rows = cm->mi_rows; - int cols = cm->mi_cols; - - print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type)); - print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode)); - print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0])); - print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size)); - print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode)); - - // output skip infomation. - log_frame_info(cm, "Skips:", mvs); - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(mvs, "S "); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%2d ", mi[0]->skip); - mi++; - } - fprintf(mvs, "\n"); - mi += 8; - } - fprintf(mvs, "\n"); - - // output motion vectors. - log_frame_info(cm, "Vectors ", mvs); - mi = cm->mi_grid_visible; - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(mvs, "V "); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, - mi[0]->mv[0].as_mv.col); - mi++; - } - fprintf(mvs, "\n"); - mi += 8; - } - fprintf(mvs, "\n"); - - fclose(mvs); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.c b/thirdparty/libvpx/vp9/common/vp9_entropy.c deleted file mode 100644 index 7b490af34f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropy.c +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_entropymode.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx/vpx_integer.h" - -// Unconstrained Node Tree -const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { - 2, 6, // 0 = LOW_VAL - -TWO_TOKEN, 4, // 1 = TWO - -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE - 8, 10, // 3 = HIGH_LOW - -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE - 12, 14, // 5 = CAT_THREEFOUR - -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE - -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE -}; - -const vpx_prob vp9_cat1_prob[] = { 159 }; -const vpx_prob vp9_cat2_prob[] = { 165, 145 }; -const vpx_prob vp9_cat3_prob[] = { 173, 148, 140 }; -const vpx_prob vp9_cat4_prob[] = { 176, 155, 140, 135 }; -const vpx_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 }; -const vpx_prob vp9_cat6_prob[] = { - 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 -}; -#if CONFIG_VP9_HIGHBITDEPTH -const vpx_prob vp9_cat6_prob_high12[] = { - 255, 255, 255, 255, 254, 254, 254, 252, 249, - 243, 230, 196, 177, 153, 140, 133, 130, 129 -}; -#endif - -const uint8_t vp9_coefband_trans_8x8plus[1024] = { - 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, - // beyond MAXBAND_INDEX+1 all values are filled as 5 - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -}; - -const uint8_t vp9_coefband_trans_4x4[16] = { - 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, -}; - -const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = { - 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 -}; - -// Model obtained from a 2-sided zero-centerd distribuition derived -// from a Pareto distribution. The cdf of the distribution is: -// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta] -// -// For a given beta and a given probablity of the 1-node, the alpha -// is first solved, and then the {alpha, beta} pair is used to generate -// the probabilities for the rest of the nodes. - -// beta = 8 - -// Every odd line in this table can be generated from the even lines -// by averaging : -// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] + -// vp9_pareto8_full[l+1][node] ) >> 1; -const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { - { 3, 86, 128, 6, 86, 23, 88, 29}, - { 6, 86, 128, 11, 87, 42, 91, 52}, - { 9, 86, 129, 17, 88, 61, 94, 76}, - { 12, 86, 129, 22, 88, 77, 97, 93}, - { 15, 87, 129, 28, 89, 93, 100, 110}, - { 17, 87, 129, 33, 90, 105, 103, 123}, - { 20, 88, 130, 38, 91, 118, 106, 136}, - { 23, 88, 130, 43, 91, 128, 108, 146}, - { 26, 89, 131, 48, 92, 139, 111, 156}, - { 28, 89, 131, 53, 93, 147, 114, 163}, - { 31, 90, 131, 58, 94, 156, 117, 171}, - { 34, 90, 131, 62, 94, 163, 119, 177}, - { 37, 90, 132, 66, 95, 171, 122, 184}, - { 39, 90, 132, 70, 96, 177, 124, 189}, - { 42, 91, 132, 75, 97, 183, 127, 194}, - { 44, 91, 132, 79, 97, 188, 129, 198}, - { 47, 92, 133, 83, 98, 193, 132, 202}, - { 49, 92, 133, 86, 99, 197, 134, 205}, - { 52, 93, 133, 90, 100, 201, 137, 208}, - { 54, 93, 133, 94, 100, 204, 139, 211}, - { 57, 94, 134, 98, 101, 208, 142, 214}, - { 59, 94, 134, 101, 102, 211, 144, 216}, - { 62, 94, 135, 105, 103, 214, 146, 218}, - { 64, 94, 135, 108, 103, 216, 148, 220}, - { 66, 95, 135, 111, 104, 219, 151, 222}, - { 68, 95, 135, 114, 105, 221, 153, 223}, - { 71, 96, 136, 117, 106, 224, 155, 225}, - { 73, 96, 136, 120, 106, 225, 157, 226}, - { 76, 97, 136, 123, 107, 227, 159, 228}, - { 78, 97, 136, 126, 108, 229, 160, 229}, - { 80, 98, 137, 129, 109, 231, 162, 231}, - { 82, 98, 137, 131, 109, 232, 164, 232}, - { 84, 98, 138, 134, 110, 234, 166, 233}, - { 86, 98, 138, 137, 111, 235, 168, 234}, - { 89, 99, 138, 140, 112, 236, 170, 235}, - { 91, 99, 138, 142, 112, 237, 171, 235}, - { 93, 100, 139, 145, 113, 238, 173, 236}, - { 95, 100, 139, 147, 114, 239, 174, 237}, - { 97, 101, 140, 149, 115, 240, 176, 238}, - { 99, 101, 140, 151, 115, 241, 177, 238}, - {101, 102, 140, 154, 116, 242, 179, 239}, - {103, 102, 140, 156, 117, 242, 180, 239}, - {105, 103, 141, 158, 118, 243, 182, 240}, - {107, 103, 141, 160, 118, 243, 183, 240}, - {109, 104, 141, 162, 119, 244, 185, 241}, - {111, 104, 141, 164, 119, 244, 186, 241}, - {113, 104, 142, 166, 120, 245, 187, 242}, - {114, 104, 142, 168, 121, 245, 188, 242}, - {116, 105, 143, 170, 122, 246, 190, 243}, - {118, 105, 143, 171, 122, 246, 191, 243}, - {120, 106, 143, 173, 123, 247, 192, 244}, - {121, 106, 143, 175, 124, 247, 193, 244}, - {123, 107, 144, 177, 125, 248, 195, 244}, - {125, 107, 144, 178, 125, 248, 196, 244}, - {127, 108, 145, 180, 126, 249, 197, 245}, - {128, 108, 145, 181, 127, 249, 198, 245}, - {130, 109, 145, 183, 128, 249, 199, 245}, - {132, 109, 145, 184, 128, 249, 200, 245}, - {134, 110, 146, 186, 129, 250, 201, 246}, - {135, 110, 146, 187, 130, 250, 202, 246}, - {137, 111, 147, 189, 131, 251, 203, 246}, - {138, 111, 147, 190, 131, 251, 204, 246}, - {140, 112, 147, 192, 132, 251, 205, 247}, - {141, 112, 147, 193, 132, 251, 206, 247}, - {143, 113, 148, 194, 133, 251, 207, 247}, - {144, 113, 148, 195, 134, 251, 207, 247}, - {146, 114, 149, 197, 135, 252, 208, 248}, - {147, 114, 149, 198, 135, 252, 209, 248}, - {149, 115, 149, 199, 136, 252, 210, 248}, - {150, 115, 149, 200, 137, 252, 210, 248}, - {152, 115, 150, 201, 138, 252, 211, 248}, - {153, 115, 150, 202, 138, 252, 212, 248}, - {155, 116, 151, 204, 139, 253, 213, 249}, - {156, 116, 151, 205, 139, 253, 213, 249}, - {158, 117, 151, 206, 140, 253, 214, 249}, - {159, 117, 151, 207, 141, 253, 215, 249}, - {161, 118, 152, 208, 142, 253, 216, 249}, - {162, 118, 152, 209, 142, 253, 216, 249}, - {163, 119, 153, 210, 143, 253, 217, 249}, - {164, 119, 153, 211, 143, 253, 217, 249}, - {166, 120, 153, 212, 144, 254, 218, 250}, - {167, 120, 153, 212, 145, 254, 219, 250}, - {168, 121, 154, 213, 146, 254, 220, 250}, - {169, 121, 154, 214, 146, 254, 220, 250}, - {171, 122, 155, 215, 147, 254, 221, 250}, - {172, 122, 155, 216, 147, 254, 221, 250}, - {173, 123, 155, 217, 148, 254, 222, 250}, - {174, 123, 155, 217, 149, 254, 222, 250}, - {176, 124, 156, 218, 150, 254, 223, 250}, - {177, 124, 156, 219, 150, 254, 223, 250}, - {178, 125, 157, 220, 151, 254, 224, 251}, - {179, 125, 157, 220, 151, 254, 224, 251}, - {180, 126, 157, 221, 152, 254, 225, 251}, - {181, 126, 157, 221, 152, 254, 225, 251}, - {183, 127, 158, 222, 153, 254, 226, 251}, - {184, 127, 158, 223, 154, 254, 226, 251}, - {185, 128, 159, 224, 155, 255, 227, 251}, - {186, 128, 159, 224, 155, 255, 227, 251}, - {187, 129, 160, 225, 156, 255, 228, 251}, - {188, 130, 160, 225, 156, 255, 228, 251}, - {189, 131, 160, 226, 157, 255, 228, 251}, - {190, 131, 160, 226, 158, 255, 228, 251}, - {191, 132, 161, 227, 159, 255, 229, 251}, - {192, 132, 161, 227, 159, 255, 229, 251}, - {193, 133, 162, 228, 160, 255, 230, 252}, - {194, 133, 162, 229, 160, 255, 230, 252}, - {195, 134, 163, 230, 161, 255, 231, 252}, - {196, 134, 163, 230, 161, 255, 231, 252}, - {197, 135, 163, 231, 162, 255, 231, 252}, - {198, 135, 163, 231, 162, 255, 231, 252}, - {199, 136, 164, 232, 163, 255, 232, 252}, - {200, 136, 164, 232, 164, 255, 232, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {202, 138, 166, 233, 166, 255, 233, 252}, - {203, 138, 166, 233, 166, 255, 233, 252}, - {204, 139, 166, 234, 167, 255, 234, 252}, - {205, 139, 166, 234, 167, 255, 234, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {207, 141, 168, 236, 169, 255, 235, 252}, - {208, 141, 168, 236, 170, 255, 235, 252}, - {209, 142, 169, 237, 171, 255, 236, 252}, - {209, 143, 169, 237, 171, 255, 236, 252}, - {210, 144, 169, 237, 172, 255, 236, 252}, - {211, 144, 169, 237, 172, 255, 236, 252}, - {212, 145, 170, 238, 173, 255, 237, 252}, - {213, 145, 170, 238, 173, 255, 237, 252}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {216, 148, 173, 240, 176, 255, 238, 253}, - {217, 148, 173, 240, 176, 255, 238, 253}, - {218, 149, 173, 241, 177, 255, 239, 253}, - {218, 149, 173, 241, 178, 255, 239, 253}, - {219, 150, 174, 241, 179, 255, 239, 253}, - {219, 151, 174, 241, 179, 255, 239, 253}, - {220, 152, 175, 242, 180, 255, 240, 253}, - {221, 152, 175, 242, 180, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {225, 156, 178, 244, 184, 255, 241, 253}, - {225, 157, 178, 244, 184, 255, 241, 253}, - {226, 158, 179, 244, 185, 255, 242, 253}, - {227, 158, 179, 244, 185, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {230, 161, 182, 246, 188, 255, 243, 253}, - {230, 162, 182, 246, 188, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {234, 166, 185, 247, 192, 255, 244, 253}, - {234, 167, 185, 247, 192, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {237, 171, 189, 249, 196, 255, 245, 254}, - {237, 172, 189, 249, 196, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {240, 175, 192, 249, 199, 255, 246, 254}, - {240, 176, 192, 249, 199, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {242, 179, 195, 250, 202, 255, 246, 254}, - {242, 180, 195, 250, 202, 255, 246, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {243, 182, 197, 251, 204, 255, 247, 254}, - {243, 183, 197, 251, 204, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {245, 186, 200, 251, 207, 255, 247, 254}, - {245, 187, 200, 251, 207, 255, 247, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 189, 202, 252, 208, 255, 248, 254}, - {246, 190, 202, 252, 208, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 192, 204, 252, 210, 255, 248, 254}, - {247, 193, 204, 252, 210, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 195, 206, 252, 212, 255, 249, 254}, - {248, 196, 206, 252, 212, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 198, 208, 253, 214, 255, 249, 254}, - {249, 199, 209, 253, 214, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 201, 211, 253, 215, 255, 249, 254}, - {250, 202, 211, 253, 215, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {251, 204, 213, 253, 217, 255, 250, 254}, - {251, 205, 213, 253, 217, 255, 250, 254}, - {251, 206, 214, 254, 218, 255, 250, 254}, - {251, 206, 215, 254, 218, 255, 250, 254}, - {252, 207, 216, 254, 219, 255, 250, 254}, - {252, 208, 216, 254, 219, 255, 250, 254}, - {252, 209, 217, 254, 220, 255, 250, 254}, - {252, 210, 217, 254, 220, 255, 250, 254}, - {252, 211, 218, 254, 221, 255, 250, 254}, - {252, 212, 218, 254, 221, 255, 250, 254}, - {253, 213, 219, 254, 222, 255, 250, 254}, - {253, 213, 220, 254, 222, 255, 250, 254}, - {253, 214, 221, 254, 223, 255, 250, 254}, - {253, 215, 221, 254, 223, 255, 250, 254}, - {253, 216, 222, 254, 224, 255, 251, 254}, - {253, 217, 223, 254, 224, 255, 251, 254}, - {253, 218, 224, 254, 225, 255, 251, 254}, - {253, 219, 224, 254, 225, 255, 251, 254}, - {254, 220, 225, 254, 225, 255, 251, 254}, - {254, 221, 226, 254, 225, 255, 251, 254}, - {254, 222, 227, 255, 226, 255, 251, 254}, - {254, 223, 227, 255, 226, 255, 251, 254}, - {254, 224, 228, 255, 227, 255, 251, 254}, - {254, 225, 229, 255, 227, 255, 251, 254}, - {254, 226, 230, 255, 228, 255, 251, 254}, - {254, 227, 230, 255, 229, 255, 251, 254}, - {255, 228, 231, 255, 230, 255, 251, 254}, - {255, 229, 232, 255, 230, 255, 251, 254}, - {255, 230, 233, 255, 231, 255, 252, 254}, - {255, 231, 234, 255, 231, 255, 252, 254}, - {255, 232, 235, 255, 232, 255, 252, 254}, - {255, 233, 236, 255, 232, 255, 252, 254}, - {255, 235, 237, 255, 233, 255, 252, 254}, - {255, 236, 238, 255, 234, 255, 252, 254}, - {255, 238, 240, 255, 235, 255, 252, 255}, - {255, 239, 241, 255, 235, 255, 252, 254}, - {255, 241, 243, 255, 236, 255, 252, 254}, - {255, 243, 245, 255, 237, 255, 252, 254}, - {255, 246, 247, 255, 239, 255, 253, 255}, -}; - -static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 195, 29, 183 }, { 84, 49, 136 }, { 8, 42, 71 } - }, { // Band 1 - { 31, 107, 169 }, { 35, 99, 159 }, { 17, 82, 140 }, - { 8, 66, 114 }, { 2, 44, 76 }, { 1, 19, 32 } - }, { // Band 2 - { 40, 132, 201 }, { 29, 114, 187 }, { 13, 91, 157 }, - { 7, 75, 127 }, { 3, 58, 95 }, { 1, 28, 47 } - }, { // Band 3 - { 69, 142, 221 }, { 42, 122, 201 }, { 15, 91, 159 }, - { 6, 67, 121 }, { 1, 42, 77 }, { 1, 17, 31 } - }, { // Band 4 - { 102, 148, 228 }, { 67, 117, 204 }, { 17, 82, 154 }, - { 6, 59, 114 }, { 2, 39, 75 }, { 1, 15, 29 } - }, { // Band 5 - { 156, 57, 233 }, { 119, 57, 212 }, { 58, 48, 163 }, - { 29, 40, 124 }, { 12, 30, 81 }, { 3, 12, 31 } - } - }, { // Inter - { // Band 0 - { 191, 107, 226 }, { 124, 117, 204 }, { 25, 99, 155 } - }, { // Band 1 - { 29, 148, 210 }, { 37, 126, 194 }, { 8, 93, 157 }, - { 2, 68, 118 }, { 1, 39, 69 }, { 1, 17, 33 } - }, { // Band 2 - { 41, 151, 213 }, { 27, 123, 193 }, { 3, 82, 144 }, - { 1, 58, 105 }, { 1, 32, 60 }, { 1, 13, 26 } - }, { // Band 3 - { 59, 159, 220 }, { 23, 126, 198 }, { 4, 88, 151 }, - { 1, 66, 114 }, { 1, 38, 71 }, { 1, 18, 34 } - }, { // Band 4 - { 114, 136, 232 }, { 51, 114, 207 }, { 11, 83, 155 }, - { 3, 56, 105 }, { 1, 33, 65 }, { 1, 17, 34 } - }, { // Band 5 - { 149, 65, 234 }, { 121, 57, 215 }, { 61, 49, 166 }, - { 28, 36, 114 }, { 12, 25, 76 }, { 3, 16, 42 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 214, 49, 220 }, { 132, 63, 188 }, { 42, 65, 137 } - }, { // Band 1 - { 85, 137, 221 }, { 104, 131, 216 }, { 49, 111, 192 }, - { 21, 87, 155 }, { 2, 49, 87 }, { 1, 16, 28 } - }, { // Band 2 - { 89, 163, 230 }, { 90, 137, 220 }, { 29, 100, 183 }, - { 10, 70, 135 }, { 2, 42, 81 }, { 1, 17, 33 } - }, { // Band 3 - { 108, 167, 237 }, { 55, 133, 222 }, { 15, 97, 179 }, - { 4, 72, 135 }, { 1, 45, 85 }, { 1, 19, 38 } - }, { // Band 4 - { 124, 146, 240 }, { 66, 124, 224 }, { 17, 88, 175 }, - { 4, 58, 122 }, { 1, 36, 75 }, { 1, 18, 37 } - }, { // Band 5 - { 141, 79, 241 }, { 126, 70, 227 }, { 66, 58, 182 }, - { 30, 44, 136 }, { 12, 34, 96 }, { 2, 20, 47 } - } - }, { // Inter - { // Band 0 - { 229, 99, 249 }, { 143, 111, 235 }, { 46, 109, 192 } - }, { // Band 1 - { 82, 158, 236 }, { 94, 146, 224 }, { 25, 117, 191 }, - { 9, 87, 149 }, { 3, 56, 99 }, { 1, 33, 57 } - }, { // Band 2 - { 83, 167, 237 }, { 68, 145, 222 }, { 10, 103, 177 }, - { 2, 72, 131 }, { 1, 41, 79 }, { 1, 20, 39 } - }, { // Band 3 - { 99, 167, 239 }, { 47, 141, 224 }, { 10, 104, 178 }, - { 2, 73, 133 }, { 1, 44, 85 }, { 1, 22, 47 } - }, { // Band 4 - { 127, 145, 243 }, { 71, 129, 228 }, { 17, 93, 177 }, - { 3, 61, 124 }, { 1, 41, 84 }, { 1, 21, 52 } - }, { // Band 5 - { 157, 78, 244 }, { 140, 72, 231 }, { 69, 58, 184 }, - { 31, 44, 137 }, { 14, 38, 105 }, { 8, 23, 61 } - } - } - } -}; - -static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 125, 34, 187 }, { 52, 41, 133 }, { 6, 31, 56 } - }, { // Band 1 - { 37, 109, 153 }, { 51, 102, 147 }, { 23, 87, 128 }, - { 8, 67, 101 }, { 1, 41, 63 }, { 1, 19, 29 } - }, { // Band 2 - { 31, 154, 185 }, { 17, 127, 175 }, { 6, 96, 145 }, - { 2, 73, 114 }, { 1, 51, 82 }, { 1, 28, 45 } - }, { // Band 3 - { 23, 163, 200 }, { 10, 131, 185 }, { 2, 93, 148 }, - { 1, 67, 111 }, { 1, 41, 69 }, { 1, 14, 24 } - }, { // Band 4 - { 29, 176, 217 }, { 12, 145, 201 }, { 3, 101, 156 }, - { 1, 69, 111 }, { 1, 39, 63 }, { 1, 14, 23 } - }, { // Band 5 - { 57, 192, 233 }, { 25, 154, 215 }, { 6, 109, 167 }, - { 3, 78, 118 }, { 1, 48, 69 }, { 1, 21, 29 } - } - }, { // Inter - { // Band 0 - { 202, 105, 245 }, { 108, 106, 216 }, { 18, 90, 144 } - }, { // Band 1 - { 33, 172, 219 }, { 64, 149, 206 }, { 14, 117, 177 }, - { 5, 90, 141 }, { 2, 61, 95 }, { 1, 37, 57 } - }, { // Band 2 - { 33, 179, 220 }, { 11, 140, 198 }, { 1, 89, 148 }, - { 1, 60, 104 }, { 1, 33, 57 }, { 1, 12, 21 } - }, { // Band 3 - { 30, 181, 221 }, { 8, 141, 198 }, { 1, 87, 145 }, - { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 20 } - }, { // Band 4 - { 32, 186, 224 }, { 7, 142, 198 }, { 1, 86, 143 }, - { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 22 } - }, { // Band 5 - { 57, 192, 227 }, { 20, 143, 204 }, { 3, 96, 154 }, - { 1, 68, 112 }, { 1, 42, 69 }, { 1, 19, 32 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 212, 35, 215 }, { 113, 47, 169 }, { 29, 48, 105 } - }, { // Band 1 - { 74, 129, 203 }, { 106, 120, 203 }, { 49, 107, 178 }, - { 19, 84, 144 }, { 4, 50, 84 }, { 1, 15, 25 } - }, { // Band 2 - { 71, 172, 217 }, { 44, 141, 209 }, { 15, 102, 173 }, - { 6, 76, 133 }, { 2, 51, 89 }, { 1, 24, 42 } - }, { // Band 3 - { 64, 185, 231 }, { 31, 148, 216 }, { 8, 103, 175 }, - { 3, 74, 131 }, { 1, 46, 81 }, { 1, 18, 30 } - }, { // Band 4 - { 65, 196, 235 }, { 25, 157, 221 }, { 5, 105, 174 }, - { 1, 67, 120 }, { 1, 38, 69 }, { 1, 15, 30 } - }, { // Band 5 - { 65, 204, 238 }, { 30, 156, 224 }, { 7, 107, 177 }, - { 2, 70, 124 }, { 1, 42, 73 }, { 1, 18, 34 } - } - }, { // Inter - { // Band 0 - { 225, 86, 251 }, { 144, 104, 235 }, { 42, 99, 181 } - }, { // Band 1 - { 85, 175, 239 }, { 112, 165, 229 }, { 29, 136, 200 }, - { 12, 103, 162 }, { 6, 77, 123 }, { 2, 53, 84 } - }, { // Band 2 - { 75, 183, 239 }, { 30, 155, 221 }, { 3, 106, 171 }, - { 1, 74, 128 }, { 1, 44, 76 }, { 1, 17, 28 } - }, { // Band 3 - { 73, 185, 240 }, { 27, 159, 222 }, { 2, 107, 172 }, - { 1, 75, 127 }, { 1, 42, 73 }, { 1, 17, 29 } - }, { // Band 4 - { 62, 190, 238 }, { 21, 159, 222 }, { 2, 107, 172 }, - { 1, 72, 122 }, { 1, 40, 71 }, { 1, 18, 32 } - }, { // Band 5 - { 61, 199, 240 }, { 27, 161, 226 }, { 4, 113, 180 }, - { 1, 76, 129 }, { 1, 46, 80 }, { 1, 23, 41 } - } - } - } -}; - -static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 7, 27, 153 }, { 5, 30, 95 }, { 1, 16, 30 } - }, { // Band 1 - { 50, 75, 127 }, { 57, 75, 124 }, { 27, 67, 108 }, - { 10, 54, 86 }, { 1, 33, 52 }, { 1, 12, 18 } - }, { // Band 2 - { 43, 125, 151 }, { 26, 108, 148 }, { 7, 83, 122 }, - { 2, 59, 89 }, { 1, 38, 60 }, { 1, 17, 27 } - }, { // Band 3 - { 23, 144, 163 }, { 13, 112, 154 }, { 2, 75, 117 }, - { 1, 50, 81 }, { 1, 31, 51 }, { 1, 14, 23 } - }, { // Band 4 - { 18, 162, 185 }, { 6, 123, 171 }, { 1, 78, 125 }, - { 1, 51, 86 }, { 1, 31, 54 }, { 1, 14, 23 } - }, { // Band 5 - { 15, 199, 227 }, { 3, 150, 204 }, { 1, 91, 146 }, - { 1, 55, 95 }, { 1, 30, 53 }, { 1, 11, 20 } - } - }, { // Inter - { // Band 0 - { 19, 55, 240 }, { 19, 59, 196 }, { 3, 52, 105 } - }, { // Band 1 - { 41, 166, 207 }, { 104, 153, 199 }, { 31, 123, 181 }, - { 14, 101, 152 }, { 5, 72, 106 }, { 1, 36, 52 } - }, { // Band 2 - { 35, 176, 211 }, { 12, 131, 190 }, { 2, 88, 144 }, - { 1, 60, 101 }, { 1, 36, 60 }, { 1, 16, 28 } - }, { // Band 3 - { 28, 183, 213 }, { 8, 134, 191 }, { 1, 86, 142 }, - { 1, 56, 96 }, { 1, 30, 53 }, { 1, 12, 20 } - }, { // Band 4 - { 20, 190, 215 }, { 4, 135, 192 }, { 1, 84, 139 }, - { 1, 53, 91 }, { 1, 28, 49 }, { 1, 11, 20 } - }, { // Band 5 - { 13, 196, 216 }, { 2, 137, 192 }, { 1, 86, 143 }, - { 1, 57, 99 }, { 1, 32, 56 }, { 1, 13, 24 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 211, 29, 217 }, { 96, 47, 156 }, { 22, 43, 87 } - }, { // Band 1 - { 78, 120, 193 }, { 111, 116, 186 }, { 46, 102, 164 }, - { 15, 80, 128 }, { 2, 49, 76 }, { 1, 18, 28 } - }, { // Band 2 - { 71, 161, 203 }, { 42, 132, 192 }, { 10, 98, 150 }, - { 3, 69, 109 }, { 1, 44, 70 }, { 1, 18, 29 } - }, { // Band 3 - { 57, 186, 211 }, { 30, 140, 196 }, { 4, 93, 146 }, - { 1, 62, 102 }, { 1, 38, 65 }, { 1, 16, 27 } - }, { // Band 4 - { 47, 199, 217 }, { 14, 145, 196 }, { 1, 88, 142 }, - { 1, 57, 98 }, { 1, 36, 62 }, { 1, 15, 26 } - }, { // Band 5 - { 26, 219, 229 }, { 5, 155, 207 }, { 1, 94, 151 }, - { 1, 60, 104 }, { 1, 36, 62 }, { 1, 16, 28 } - } - }, { // Inter - { // Band 0 - { 233, 29, 248 }, { 146, 47, 220 }, { 43, 52, 140 } - }, { // Band 1 - { 100, 163, 232 }, { 179, 161, 222 }, { 63, 142, 204 }, - { 37, 113, 174 }, { 26, 89, 137 }, { 18, 68, 97 } - }, { // Band 2 - { 85, 181, 230 }, { 32, 146, 209 }, { 7, 100, 164 }, - { 3, 71, 121 }, { 1, 45, 77 }, { 1, 18, 30 } - }, { // Band 3 - { 65, 187, 230 }, { 20, 148, 207 }, { 2, 97, 159 }, - { 1, 68, 116 }, { 1, 40, 70 }, { 1, 14, 29 } - }, { // Band 4 - { 40, 194, 227 }, { 8, 147, 204 }, { 1, 94, 155 }, - { 1, 65, 112 }, { 1, 39, 66 }, { 1, 14, 26 } - }, { // Band 5 - { 16, 208, 228 }, { 3, 151, 207 }, { 1, 98, 160 }, - { 1, 67, 117 }, { 1, 41, 74 }, { 1, 17, 31 } - } - } - } -}; - -static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 } - }, { // Band 1 - { 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 }, - { 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 } - }, { // Band 2 - { 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 }, - { 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 } - }, { // Band 3 - { 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 }, - { 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 } - }, { // Band 4 - { 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 }, - { 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 } - }, { // Band 5 - { 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 }, - { 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 } - } - }, { // Inter - { // Band 0 - { 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 } - }, { // Band 1 - { 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 }, - { 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 } - }, { // Band 2 - { 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 }, - { 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 } - }, { // Band 3 - { 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 }, - { 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 } - }, { // Band 4 - { 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 }, - { 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 } - }, { // Band 5 - { 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 }, - { 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 } - }, { // Band 1 - { 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 }, - { 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 } - }, { // Band 2 - { 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 }, - { 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 } - }, { // Band 3 - { 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 }, - { 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 } - }, { // Band 4 - { 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 }, - { 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 } - }, { // Band 5 - { 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 }, - { 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 } - } - }, { // Inter - { // Band 0 - { 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 } - }, { // Band 1 - { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 }, - { 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 } - }, { // Band 2 - { 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 }, - { 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 } - }, { // Band 3 - { 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 }, - { 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 } - }, { // Band 4 - { 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 }, - { 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 } - }, { // Band 5 - { 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 }, - { 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 } - } - } - } -}; - -static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { - assert(p != 0); - memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); -} - -void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { - if (full != model) - memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES); - extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]); -} - -void vp9_default_coef_probs(VP9_COMMON *cm) { - vp9_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4); - vp9_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8); - vp9_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16); - vp9_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32); -} - -#define COEF_COUNT_SAT 24 -#define COEF_MAX_UPDATE_FACTOR 112 -#define COEF_COUNT_SAT_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_KEY 112 -#define COEF_COUNT_SAT_AFTER_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 - -static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size, - unsigned int count_sat, - unsigned int update_factor) { - const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; - vp9_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size]; - const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size]; - vp9_coeff_count_model *counts = cm->counts.coef[tx_size]; - unsigned int (*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = - cm->counts.eob_branch[tx_size]; - int i, j, k, l, m; - - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - const int n0 = counts[i][j][k][l][ZERO_TOKEN]; - const int n1 = counts[i][j][k][l][ONE_TOKEN]; - const int n2 = counts[i][j][k][l][TWO_TOKEN]; - const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN]; - const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = { - { neob, eob_counts[i][j][k][l] - neob }, - { n0, n1 + n2 }, - { n1, n2 } - }; - for (m = 0; m < UNCONSTRAINED_NODES; ++m) - probs[i][j][k][l][m] = merge_probs(pre_probs[i][j][k][l][m], - branch_ct[m], - count_sat, update_factor); - } -} - -void vp9_adapt_coef_probs(VP9_COMMON *cm) { - TX_SIZE t; - unsigned int count_sat, update_factor; - - if (frame_is_intra_only(cm)) { - update_factor = COEF_MAX_UPDATE_FACTOR_KEY; - count_sat = COEF_COUNT_SAT_KEY; - } else if (cm->last_frame_type == KEY_FRAME) { - update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ - count_sat = COEF_COUNT_SAT_AFTER_KEY; - } else { - update_factor = COEF_MAX_UPDATE_FACTOR; - count_sat = COEF_COUNT_SAT; - } - for (t = TX_4X4; t <= TX_32X32; t++) - adapt_coef_probs(cm, t, count_sat, update_factor); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.h b/thirdparty/libvpx/vp9/common/vp9_entropy.h deleted file mode 100644 index 63b3bff5d9..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropy.h +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ENTROPY_H_ -#define VP9_COMMON_VP9_ENTROPY_H_ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/prob.h" - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define DIFF_UPDATE_PROB 252 - -// Coefficient token alphabet -#define ZERO_TOKEN 0 // 0 Extra Bits 0+0 -#define ONE_TOKEN 1 // 1 Extra Bits 0+1 -#define TWO_TOKEN 2 // 2 Extra Bits 0+1 -#define THREE_TOKEN 3 // 3 Extra Bits 0+1 -#define FOUR_TOKEN 4 // 4 Extra Bits 0+1 -#define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1 -#define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1 -#define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1 -#define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1 -#define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1 -#define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1 -#define EOB_TOKEN 11 // EOB Extra Bits 0+0 - -#define ENTROPY_TOKENS 12 - -#define ENTROPY_NODES 11 - -DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]); - -#define CAT1_MIN_VAL 5 -#define CAT2_MIN_VAL 7 -#define CAT3_MIN_VAL 11 -#define CAT4_MIN_VAL 19 -#define CAT5_MIN_VAL 35 -#define CAT6_MIN_VAL 67 - -// Extra bit probabilities. -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob[1]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob[2]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob[3]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob[4]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob[5]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob[14]); - -#if CONFIG_VP9_HIGHBITDEPTH -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high10[1]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high10[2]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high10[3]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high10[4]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high10[5]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high10[16]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high12[1]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high12[2]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high12[3]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high12[4]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high12[5]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]); -#endif // CONFIG_VP9_HIGHBITDEPTH - -#define EOB_MODEL_TOKEN 3 - -#define DCT_MAX_VALUE 16384 -#if CONFIG_VP9_HIGHBITDEPTH -#define DCT_MAX_VALUE_HIGH10 65536 -#define DCT_MAX_VALUE_HIGH12 262144 -#endif // CONFIG_VP9_HIGHBITDEPTH - -/* Coefficients are predicted via a 3-dimensional probability table. */ - -#define REF_TYPES 2 // intra=0, inter=1 - -/* Middle dimension reflects the coefficient position within the transform. */ -#define COEF_BANDS 6 - -/* Inside dimension is measure of nearby complexity, that reflects the energy - of nearby coefficients are nonzero. For the first coefficient (DC, unless - block type is 0), we look at the (already encoded) blocks above and to the - left of the current block. The context index is then the number (0,1,or 2) - of these blocks having nonzero coefficients. - After decoding a coefficient, the measure is determined by the size of the - most recently decoded coefficient. - Note that the intuitive meaning of this measure changes as coefficients - are decoded, e.g., prior to the first token, a zero means that my neighbors - are empty while, after the first token, because of the use of end-of-block, - a zero means we just decoded a zero and hence guarantees that a non-zero - coefficient will appear later in this block. However, this shift - in meaning is perfectly OK because our context depends also on the - coefficient band (and since zigzag positions 0, 1, and 2 are in - distinct bands). */ - -#define COEFF_CONTEXTS 6 -#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS) - -// #define ENTROPY_STATS - -typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] - [ENTROPY_TOKENS]; -typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] - [ENTROPY_NODES][2]; - -#define SUBEXP_PARAM 4 /* Subexponential code parameter */ -#define MODULUS_PARAM 13 /* Modulus parameter */ - -struct VP9Common; -void vp9_default_coef_probs(struct VP9Common *cm); -void vp9_adapt_coef_probs(struct VP9Common *cm); - -// This is the index in the scan order beyond which all coefficients for -// 8x8 transform and above are in the top band. -// This macro is currently unused but may be used by certain implementations -#define MAXBAND_INDEX 21 - -DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]); - -static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { - return tx_size == TX_4X4 ? vp9_coefband_trans_4x4 - : vp9_coefband_trans_8x8plus; -} - -// 128 lists of probabilities are stored for the following ONE node probs: -// 1, 3, 5, 7, ..., 253, 255 -// In between probabilities are interpolated linearly - -#define COEFF_PROB_MODELS 255 - -#define UNCONSTRAINED_NODES 3 - -#define PIVOT_NODE 2 // which node is pivot - -#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) -extern const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)]; -extern const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; - -typedef vpx_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS][UNCONSTRAINED_NODES]; - -typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS] - [UNCONSTRAINED_NODES + 1]; - -void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full); - -typedef char ENTROPY_CONTEXT; - -static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, - ENTROPY_CONTEXT b) { - return (a != 0) + (b != 0); -} - -static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, - const ENTROPY_CONTEXT *l) { - ENTROPY_CONTEXT above_ec = 0, left_ec = 0; - - switch (tx_size) { - case TX_4X4: - above_ec = a[0] != 0; - left_ec = l[0] != 0; - break; - case TX_8X8: - above_ec = !!*(const uint16_t *)a; - left_ec = !!*(const uint16_t *)l; - break; - case TX_16X16: - above_ec = !!*(const uint32_t *)a; - left_ec = !!*(const uint32_t *)l; - break; - case TX_32X32: - above_ec = !!*(const uint64_t *)a; - left_ec = !!*(const uint64_t *)l; - break; - default: - assert(0 && "Invalid transform size."); - break; - } - - return combine_entropy_contexts(above_ec, left_ec); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.c b/thirdparty/libvpx/vp9/common/vp9_entropymode.c deleted file mode 100644 index 670348bafd..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymode.c +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_seg_common.h" - -const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { - { // above = dc - { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc - { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v - { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h - { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 - { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135 - { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117 - { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153 - { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207 - { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63 - { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm - }, { // above = v - { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc - { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v - { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h - { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45 - { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135 - { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117 - { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153 - { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207 - { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63 - { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm - }, { // above = h - { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc - { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v - { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h - { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45 - { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135 - { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117 - { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153 - { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207 - { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63 - { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm - }, { // above = d45 - { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc - { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v - { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h - { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45 - { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135 - { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117 - { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153 - { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207 - { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63 - { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm - }, { // above = d135 - { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc - { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v - { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h - { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45 - { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135 - { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117 - { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153 - { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207 - { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63 - { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm - }, { // above = d117 - { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc - { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v - { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h - { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45 - { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135 - { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117 - { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153 - { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207 - { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63 - { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm - }, { // above = d153 - { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc - { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v - { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h - { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45 - { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135 - { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117 - { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153 - { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207 - { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63 - { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm - }, { // above = d207 - { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc - { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v - { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h - { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45 - { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135 - { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117 - { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153 - { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207 - { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63 - { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm - }, { // above = d63 - { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc - { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v - { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h - { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45 - { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135 - { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117 - { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153 - { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207 - { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63 - { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm - }, { // above = tm - { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc - { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v - { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h - { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45 - { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135 - { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117 - { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153 - { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207 - { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63 - { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm - } -}; - -const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = { - { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc - { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v - { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h - { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45 - { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135 - { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117 - { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153 - { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207 - { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63 - { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm -}; - -static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = { - { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 - { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 - { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 - { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 -}; - -static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = { - { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc - { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v - { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h - { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45 - { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135 - { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117 - { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153 - { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207 - { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63 - { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm -}; - -const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1] = { - // 8x8 -> 4x4 - { 158, 97, 94 }, // a/l both not split - { 93, 24, 99 }, // a split, l not split - { 85, 119, 44 }, // l split, a not split - { 62, 59, 67 }, // a/l both split - // 16x16 -> 8x8 - { 149, 53, 53 }, // a/l both not split - { 94, 20, 48 }, // a split, l not split - { 83, 53, 24 }, // l split, a not split - { 52, 18, 18 }, // a/l both split - // 32x32 -> 16x16 - { 150, 40, 39 }, // a/l both not split - { 78, 12, 26 }, // a split, l not split - { 67, 33, 11 }, // l split, a not split - { 24, 7, 5 }, // a/l both split - // 64x64 -> 32x32 - { 174, 35, 49 }, // a/l both not split - { 68, 11, 27 }, // a split, l not split - { 57, 15, 9 }, // l split, a not split - { 12, 3, 3 }, // a/l both split -}; - -static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1] = { - // 8x8 -> 4x4 - { 199, 122, 141 }, // a/l both not split - { 147, 63, 159 }, // a split, l not split - { 148, 133, 118 }, // l split, a not split - { 121, 104, 114 }, // a/l both split - // 16x16 -> 8x8 - { 174, 73, 87 }, // a/l both not split - { 92, 41, 83 }, // a split, l not split - { 82, 99, 50 }, // l split, a not split - { 53, 39, 39 }, // a/l both split - // 32x32 -> 16x16 - { 177, 58, 59 }, // a/l both not split - { 68, 26, 63 }, // a split, l not split - { 52, 79, 25 }, // l split, a not split - { 17, 14, 12 }, // a/l both split - // 64x64 -> 32x32 - { 222, 34, 30 }, // a/l both not split - { 72, 16, 44 }, // a split, l not split - { 58, 32, 12 }, // l split, a not split - { 10, 7, 6 }, // a/l both split -}; - -static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] - [INTER_MODES - 1] = { - {2, 173, 34}, // 0 = both zero mv - {7, 145, 85}, // 1 = one zero mv + one a predicted mv - {7, 166, 63}, // 2 = two predicted mvs - {7, 94, 66}, // 3 = one predicted/zero and one new mv - {8, 64, 46}, // 4 = two new mvs - {17, 81, 31}, // 5 = one intra neighbour + x - {25, 29, 30}, // 6 = two intra neighbours -}; - -/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ -const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { - -DC_PRED, 2, /* 0 = DC_NODE */ - -TM_PRED, 4, /* 1 = TM_NODE */ - -V_PRED, 6, /* 2 = V_NODE */ - 8, 12, /* 3 = COM_NODE */ - -H_PRED, 10, /* 4 = H_NODE */ - -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ - -D45_PRED, 14, /* 6 = D45_NODE */ - -D63_PRED, 16, /* 7 = D63_NODE */ - -D153_PRED, -D207_PRED /* 8 = D153_NODE */ -}; - -const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { - -INTER_OFFSET(ZEROMV), 2, - -INTER_OFFSET(NEARESTMV), 4, - -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV) -}; - -const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { - -PARTITION_NONE, 2, - -PARTITION_HORZ, 4, - -PARTITION_VERT, -PARTITION_SPLIT -}; - -static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { - 9, 102, 187, 225 -}; - -static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { - 239, 183, 119, 96, 41 -}; - -static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { - 50, 126, 123, 221, 226 -}; - -static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = { - { 33, 16 }, - { 77, 74 }, - { 142, 142 }, - { 172, 170 }, - { 238, 247 } -}; - -static const struct tx_probs default_tx_probs = { - { { 3, 136, 37 }, - { 5, 52, 13 } }, - - { { 20, 152 }, - { 15, 101 } }, - - { { 100 }, - { 66 } } -}; - -void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, - unsigned int (*ct_32x32p)[2]) { - ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; - ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + - tx_count_32x32p[TX_16X16] + - tx_count_32x32p[TX_32X32]; - ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; - ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + - tx_count_32x32p[TX_32X32]; - ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; - ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; -} - -void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, - unsigned int (*ct_16x16p)[2]) { - ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; - ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; - ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; - ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; -} - -void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, - unsigned int (*ct_8x8p)[2]) { - ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; - ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; -} - -static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { - 192, 128, 64 -}; - -static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1] = { - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 149, 144, }, -}; - -static void init_mode_probs(FRAME_CONTEXT *fc) { - vp9_copy(fc->uv_mode_prob, default_if_uv_probs); - vp9_copy(fc->y_mode_prob, default_if_y_probs); - vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob); - vp9_copy(fc->partition_prob, default_partition_probs); - vp9_copy(fc->intra_inter_prob, default_intra_inter_p); - vp9_copy(fc->comp_inter_prob, default_comp_inter_p); - vp9_copy(fc->comp_ref_prob, default_comp_ref_p); - vp9_copy(fc->single_ref_prob, default_single_ref_p); - fc->tx_probs = default_tx_probs; - vp9_copy(fc->skip_probs, default_skip_probs); - vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); -} - -const vpx_tree_index vp9_switchable_interp_tree - [TREE_SIZE(SWITCHABLE_FILTERS)] = { - -EIGHTTAP, 2, - -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP -}; - -void vp9_adapt_mode_probs(VP9_COMMON *cm) { - int i, j; - FRAME_CONTEXT *fc = cm->fc; - const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; - const FRAME_COUNTS *counts = &cm->counts; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i], - counts->intra_inter[i]); - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i], - counts->comp_inter[i]); - for (i = 0; i < REF_CONTEXTS; i++) - fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i], - counts->comp_ref[i]); - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - fc->single_ref_prob[i][j] = mode_mv_merge_probs( - pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); - - for (i = 0; i < INTER_MODE_CONTEXTS; i++) - vpx_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i], - counts->inter_mode[i], fc->inter_mode_probs[i]); - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i], - counts->y_mode[i], fc->y_mode_prob[i]); - - for (i = 0; i < INTRA_MODES; ++i) - vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i], - counts->uv_mode[i], fc->uv_mode_prob[i]); - - for (i = 0; i < PARTITION_CONTEXTS; i++) - vpx_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i], - counts->partition[i], fc->partition_prob[i]); - - if (cm->interp_filter == SWITCHABLE) { - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - vpx_tree_merge_probs(vp9_switchable_interp_tree, - pre_fc->switchable_interp_prob[i], - counts->switchable_interp[i], - fc->switchable_interp_prob[i]); - } - - if (cm->tx_mode == TX_MODE_SELECT) { - int j; - unsigned int branch_ct_8x8p[TX_SIZES - 3][2]; - unsigned int branch_ct_16x16p[TX_SIZES - 2][2]; - unsigned int branch_ct_32x32p[TX_SIZES - 1][2]; - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { - tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); - for (j = 0; j < TX_SIZES - 3; ++j) - fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); - - tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); - for (j = 0; j < TX_SIZES - 2; ++j) - fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); - - tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); - for (j = 0; j < TX_SIZES - 1; ++j) - fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); - } - } - - for (i = 0; i < SKIP_CONTEXTS; ++i) - fc->skip_probs[i] = mode_mv_merge_probs( - pre_fc->skip_probs[i], counts->skip[i]); -} - -static void set_default_lf_deltas(struct loopfilter *lf) { - lf->mode_ref_delta_enabled = 1; - lf->mode_ref_delta_update = 1; - - lf->ref_deltas[INTRA_FRAME] = 1; - lf->ref_deltas[LAST_FRAME] = 0; - lf->ref_deltas[GOLDEN_FRAME] = -1; - lf->ref_deltas[ALTREF_FRAME] = -1; - - lf->mode_deltas[0] = 0; - lf->mode_deltas[1] = 0; -} - -void vp9_setup_past_independence(VP9_COMMON *cm) { - // Reset the segment feature data to the default stats: - // Features disabled, 0, with delta coding (Default state). - struct loopfilter *const lf = &cm->lf; - - int i; - vp9_clearall_segfeatures(&cm->seg); - cm->seg.abs_delta = SEGMENT_DELTADATA; - - if (cm->last_frame_seg_map && !cm->frame_parallel_decode) - memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); - - if (cm->current_frame_seg_map) - memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); - - // Reset the mode ref deltas for loop filter - vp9_zero(lf->last_ref_deltas); - vp9_zero(lf->last_mode_deltas); - set_default_lf_deltas(lf); - - // To force update of the sharpness - lf->last_sharpness_level = -1; - - vp9_default_coef_probs(cm); - init_mode_probs(cm->fc); - vp9_init_mv_probs(cm); - cm->fc->initialized = 1; - - if (cm->frame_type == KEY_FRAME || - cm->error_resilient_mode || cm->reset_frame_context == 3) { - // Reset all frame contexts. - for (i = 0; i < FRAME_CONTEXTS; ++i) - cm->frame_contexts[i] = *cm->fc; - } else if (cm->reset_frame_context == 2) { - // Reset only the frame context specified in the frame header. - cm->frame_contexts[cm->frame_context_idx] = *cm->fc; - } - - // prev_mip will only be allocated in encoder. - if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode) - memset(cm->prev_mip, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); - - vp9_zero(cm->ref_frame_sign_bias); - - cm->frame_context_idx = 0; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.h b/thirdparty/libvpx/vp9/common/vp9_entropymode.h deleted file mode 100644 index 0285be1557..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymode.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_ -#define VP9_COMMON_VP9_ENTROPYMODE_H_ - -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_filter.h" -#include "vpx_dsp/vpx_filter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define BLOCK_SIZE_GROUPS 4 - -#define TX_SIZE_CONTEXTS 2 - -#define INTER_OFFSET(mode) ((mode) - NEARESTMV) - -struct VP9Common; - -struct tx_probs { - vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1]; - vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2]; - vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3]; -}; - -struct tx_counts { - unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES]; - unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1]; - unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; - unsigned int tx_totals[TX_SIZES]; -}; - -typedef struct frame_contexts { - vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; - vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; - vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1]; - vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; - vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1]; - vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; - vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; - vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; - vpx_prob single_ref_prob[REF_CONTEXTS][2]; - vpx_prob comp_ref_prob[REF_CONTEXTS]; - struct tx_probs tx_probs; - vpx_prob skip_probs[SKIP_CONTEXTS]; - nmv_context nmvc; - int initialized; -} FRAME_CONTEXT; - -typedef struct FRAME_COUNTS { - unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; - unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; - unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; - vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; - unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES] - [COEF_BANDS][COEFF_CONTEXTS]; - unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS]; - unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; - unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; - unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; - unsigned int single_ref[REF_CONTEXTS][2][2]; - unsigned int comp_ref[REF_CONTEXTS][2]; - struct tx_counts tx; - unsigned int skip[SKIP_CONTEXTS][2]; - nmv_context_counts mv; -} FRAME_COUNTS; - -extern const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; -extern const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] - [INTRA_MODES - 1]; -extern const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1]; -extern const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; -extern const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)]; -extern const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)]; -extern const vpx_tree_index vp9_switchable_interp_tree - [TREE_SIZE(SWITCHABLE_FILTERS)]; - -void vp9_setup_past_independence(struct VP9Common *cm); - -void vp9_adapt_mode_probs(struct VP9Common *cm); - -void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, - unsigned int (*ct_32x32p)[2]); -void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, - unsigned int (*ct_16x16p)[2]); -void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, - unsigned int (*ct_8x8p)[2]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.c b/thirdparty/libvpx/vp9/common/vp9_entropymv.c deleted file mode 100644 index 566ae91cf7..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymv.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_entropymv.h" - -const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { - -MV_JOINT_ZERO, 2, - -MV_JOINT_HNZVZ, 4, - -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ -}; - -const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { - -MV_CLASS_0, 2, - -MV_CLASS_1, 4, - 6, 8, - -MV_CLASS_2, -MV_CLASS_3, - 10, 12, - -MV_CLASS_4, -MV_CLASS_5, - -MV_CLASS_6, 14, - 16, 18, - -MV_CLASS_7, -MV_CLASS_8, - -MV_CLASS_9, -MV_CLASS_10, -}; - -const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { - -0, -1, -}; - -const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { - -0, 2, - -1, 4, - -2, -3 -}; - -static const nmv_context default_nmv_context = { - {32, 64, 96}, - { - { // Vertical component - 128, // sign - {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class - {216}, // class0 - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits - {{128, 128, 64}, {96, 112, 64}}, // class0_fp - {64, 96, 64}, // fp - 160, // class0_hp bit - 128, // hp - }, - { // Horizontal component - 128, // sign - {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class - {208}, // class0 - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits - {{128, 128, 64}, {96, 112, 64}}, // class0_fp - {64, 96, 64}, // fp - 160, // class0_hp bit - 128, // hp - } - }, -}; - -static const uint8_t log_in_base_2[] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 -}; - -static INLINE int mv_class_base(MV_CLASS_TYPE c) { - return c ? CLASS0_SIZE << (c + 2) : 0; -} - -MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { - const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? - MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; - if (offset) - *offset = z - mv_class_base(c); - return c; -} - -static void inc_mv_component(int v, nmv_component_counts *comp_counts, - int incr, int usehp) { - int s, z, c, o, d, e, f; - assert(v != 0); /* should not be zero */ - s = v < 0; - comp_counts->sign[s] += incr; - z = (s ? -v : v) - 1; /* magnitude - 1 */ - - c = vp9_get_mv_class(z, &o); - comp_counts->classes[c] += incr; - - d = (o >> 3); /* int mv data */ - f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ - - if (c == MV_CLASS_0) { - comp_counts->class0[d] += incr; - comp_counts->class0_fp[d][f] += incr; - comp_counts->class0_hp[e] += usehp * incr; - } else { - int i; - int b = c + CLASS0_BITS - 1; // number of bits - for (i = 0; i < b; ++i) - comp_counts->bits[i][((d >> i) & 1)] += incr; - comp_counts->fp[f] += incr; - comp_counts->hp[e] += usehp * incr; - } -} - -void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) { - if (counts != NULL) { - const MV_JOINT_TYPE j = vp9_get_mv_joint(mv); - ++counts->joints[j]; - - if (mv_joint_vertical(j)) { - inc_mv_component(mv->row, &counts->comps[0], 1, 1); - } - - if (mv_joint_horizontal(j)) { - inc_mv_component(mv->col, &counts->comps[1], 1, 1); - } - } -} - -void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { - int i, j; - - nmv_context *fc = &cm->fc->nmvc; - const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc; - const nmv_context_counts *counts = &cm->counts.mv; - - vpx_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, - fc->joints); - - for (i = 0; i < 2; ++i) { - nmv_component *comp = &fc->comps[i]; - const nmv_component *pre_comp = &pre_fc->comps[i]; - const nmv_component_counts *c = &counts->comps[i]; - - comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign); - vpx_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes, - comp->classes); - vpx_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, - comp->class0); - - for (j = 0; j < MV_OFFSET_BITS; ++j) - comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]); - - for (j = 0; j < CLASS0_SIZE; ++j) - vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], - c->class0_fp[j], comp->class0_fp[j]); - - vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); - - if (allow_hp) { - comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); - comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp); - } - } -} - -void vp9_init_mv_probs(VP9_COMMON *cm) { - cm->fc->nmvc = default_nmv_context; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.h b/thirdparty/libvpx/vp9/common/vp9_entropymv.h deleted file mode 100644 index 2f05ad44b6..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymv.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_VP9_ENTROPYMV_H_ -#define VP9_COMMON_VP9_ENTROPYMV_H_ - -#include "./vpx_config.h" - -#include "vpx_dsp/prob.h" - -#include "vp9/common/vp9_mv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; - -void vp9_init_mv_probs(struct VP9Common *cm); - -void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); - -// Integer pel reference mv threshold for use of high-precision 1/8 mv -#define COMPANDED_MVREF_THRESH 8 - -static INLINE int use_mv_hp(const MV *ref) { - return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && - (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; -} - -#define MV_UPDATE_PROB 252 - -/* Symbols for coding which components are zero jointly */ -#define MV_JOINTS 4 -typedef enum { - MV_JOINT_ZERO = 0, /* Zero vector */ - MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ - MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ - MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ -} MV_JOINT_TYPE; - -static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { - return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; -} - -static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { - return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; -} - -/* Symbols for coding magnitude class of nonzero components */ -#define MV_CLASSES 11 -typedef enum { - MV_CLASS_0 = 0, /* (0, 2] integer pel */ - MV_CLASS_1 = 1, /* (2, 4] integer pel */ - MV_CLASS_2 = 2, /* (4, 8] integer pel */ - MV_CLASS_3 = 3, /* (8, 16] integer pel */ - MV_CLASS_4 = 4, /* (16, 32] integer pel */ - MV_CLASS_5 = 5, /* (32, 64] integer pel */ - MV_CLASS_6 = 6, /* (64, 128] integer pel */ - MV_CLASS_7 = 7, /* (128, 256] integer pel */ - MV_CLASS_8 = 8, /* (256, 512] integer pel */ - MV_CLASS_9 = 9, /* (512, 1024] integer pel */ - MV_CLASS_10 = 10, /* (1024,2048] integer pel */ -} MV_CLASS_TYPE; - -#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ -#define CLASS0_SIZE (1 << CLASS0_BITS) -#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) -#define MV_FP_SIZE 4 - -#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) -#define MV_MAX ((1 << MV_MAX_BITS) - 1) -#define MV_VALS ((MV_MAX << 1) + 1) - -#define MV_IN_USE_BITS 14 -#define MV_UPP ((1 << MV_IN_USE_BITS) - 1) -#define MV_LOW (-(1 << MV_IN_USE_BITS)) - -extern const vpx_tree_index vp9_mv_joint_tree[]; -extern const vpx_tree_index vp9_mv_class_tree[]; -extern const vpx_tree_index vp9_mv_class0_tree[]; -extern const vpx_tree_index vp9_mv_fp_tree[]; - -typedef struct { - vpx_prob sign; - vpx_prob classes[MV_CLASSES - 1]; - vpx_prob class0[CLASS0_SIZE - 1]; - vpx_prob bits[MV_OFFSET_BITS]; - vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1]; - vpx_prob fp[MV_FP_SIZE - 1]; - vpx_prob class0_hp; - vpx_prob hp; -} nmv_component; - -typedef struct { - vpx_prob joints[MV_JOINTS - 1]; - nmv_component comps[2]; -} nmv_context; - -static INLINE MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) { - if (mv->row == 0) { - return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; - } else { - return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; - } -} - -MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset); - -typedef struct { - unsigned int sign[2]; - unsigned int classes[MV_CLASSES]; - unsigned int class0[CLASS0_SIZE]; - unsigned int bits[MV_OFFSET_BITS][2]; - unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE]; - unsigned int fp[MV_FP_SIZE]; - unsigned int class0_hp[2]; - unsigned int hp[2]; -} nmv_component_counts; - -typedef struct { - unsigned int joints[MV_JOINTS]; - nmv_component_counts comps[2]; -} nmv_context_counts; - -void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENTROPYMV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_enums.h b/thirdparty/libvpx/vp9/common/vp9_enums.h deleted file mode 100644 index d089f23f97..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_enums.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ENUMS_H_ -#define VP9_COMMON_VP9_ENUMS_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MI_SIZE_LOG2 3 -#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 - -#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit -#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block - -#define MI_MASK (MI_BLOCK_SIZE - 1) - -// Bitstream profiles indicated by 2-3 bits in the uncompressed header. -// 00: Profile 0. 8-bit 4:2:0 only. -// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0. -// 01: Profile 2. 10-bit and 12-bit color only, with 4:2:0 sampling. -// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0 -// sampling. -// 111: Undefined profile. -typedef enum BITSTREAM_PROFILE { - PROFILE_0, - PROFILE_1, - PROFILE_2, - PROFILE_3, - MAX_PROFILES -} BITSTREAM_PROFILE; - -#define BLOCK_4X4 0 -#define BLOCK_4X8 1 -#define BLOCK_8X4 2 -#define BLOCK_8X8 3 -#define BLOCK_8X16 4 -#define BLOCK_16X8 5 -#define BLOCK_16X16 6 -#define BLOCK_16X32 7 -#define BLOCK_32X16 8 -#define BLOCK_32X32 9 -#define BLOCK_32X64 10 -#define BLOCK_64X32 11 -#define BLOCK_64X64 12 -#define BLOCK_SIZES 13 -#define BLOCK_INVALID BLOCK_SIZES -typedef uint8_t BLOCK_SIZE; - -typedef enum PARTITION_TYPE { - PARTITION_NONE, - PARTITION_HORZ, - PARTITION_VERT, - PARTITION_SPLIT, - PARTITION_TYPES, - PARTITION_INVALID = PARTITION_TYPES -} PARTITION_TYPE; - -typedef char PARTITION_CONTEXT; -#define PARTITION_PLOFFSET 4 // number of probability models per block size -#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) - -// block transform size -typedef uint8_t TX_SIZE; -#define TX_4X4 ((TX_SIZE)0) // 4x4 transform -#define TX_8X8 ((TX_SIZE)1) // 8x8 transform -#define TX_16X16 ((TX_SIZE)2) // 16x16 transform -#define TX_32X32 ((TX_SIZE)3) // 32x32 transform -#define TX_SIZES ((TX_SIZE)4) - -// frame transform mode -typedef enum { - ONLY_4X4 = 0, // only 4x4 transform used - ALLOW_8X8 = 1, // allow block transform size up to 8x8 - ALLOW_16X16 = 2, // allow block transform size up to 16x16 - ALLOW_32X32 = 3, // allow block transform size up to 32x32 - TX_MODE_SELECT = 4, // transform specified for each block - TX_MODES = 5, -} TX_MODE; - -typedef enum { - DCT_DCT = 0, // DCT in both horizontal and vertical - ADST_DCT = 1, // ADST in vertical, DCT in horizontal - DCT_ADST = 2, // DCT in vertical, ADST in horizontal - ADST_ADST = 3, // ADST in both directions - TX_TYPES = 4 -} TX_TYPE; - -typedef enum { - VP9_LAST_FLAG = 1 << 0, - VP9_GOLD_FLAG = 1 << 1, - VP9_ALT_FLAG = 1 << 2, -} VP9_REFFRAME; - -typedef enum { - PLANE_TYPE_Y = 0, - PLANE_TYPE_UV = 1, - PLANE_TYPES -} PLANE_TYPE; - -#define DC_PRED 0 // Average of above and left pixels -#define V_PRED 1 // Vertical -#define H_PRED 2 // Horizontal -#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi) -#define D135_PRED 4 // Directional 135 deg = 180 - 45 -#define D117_PRED 5 // Directional 117 deg = 180 - 63 -#define D153_PRED 6 // Directional 153 deg = 180 - 27 -#define D207_PRED 7 // Directional 207 deg = 180 + 27 -#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi) -#define TM_PRED 9 // True-motion -#define NEARESTMV 10 -#define NEARMV 11 -#define ZEROMV 12 -#define NEWMV 13 -#define MB_MODE_COUNT 14 -typedef uint8_t PREDICTION_MODE; - -#define INTRA_MODES (TM_PRED + 1) - -#define INTER_MODES (1 + NEWMV - NEARESTMV) - -#define SKIP_CONTEXTS 3 -#define INTER_MODE_CONTEXTS 7 - -/* Segment Feature Masks */ -#define MAX_MV_REF_CANDIDATES 2 - -#define INTRA_INTER_CONTEXTS 4 -#define COMP_INTER_CONTEXTS 5 -#define REF_CONTEXTS 5 - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.c b/thirdparty/libvpx/vp9/common/vp9_filter.c deleted file mode 100644 index 4b2198fc40..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_filter.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_filter.h" - -DECLARE_ALIGNED(256, static const InterpKernel, - bilinear_filters[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, - { 0, 0, 0, 120, 8, 0, 0, 0 }, - { 0, 0, 0, 112, 16, 0, 0, 0 }, - { 0, 0, 0, 104, 24, 0, 0, 0 }, - { 0, 0, 0, 96, 32, 0, 0, 0 }, - { 0, 0, 0, 88, 40, 0, 0, 0 }, - { 0, 0, 0, 80, 48, 0, 0, 0 }, - { 0, 0, 0, 72, 56, 0, 0, 0 }, - { 0, 0, 0, 64, 64, 0, 0, 0 }, - { 0, 0, 0, 56, 72, 0, 0, 0 }, - { 0, 0, 0, 48, 80, 0, 0, 0 }, - { 0, 0, 0, 40, 88, 0, 0, 0 }, - { 0, 0, 0, 32, 96, 0, 0, 0 }, - { 0, 0, 0, 24, 104, 0, 0, 0 }, - { 0, 0, 0, 16, 112, 0, 0, 0 }, - { 0, 0, 0, 8, 120, 0, 0, 0 } -}; - -// Lagrangian interpolation filter -DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0}, - { 0, 1, -5, 126, 8, -3, 1, 0}, - { -1, 3, -10, 122, 18, -6, 2, 0}, - { -1, 4, -13, 118, 27, -9, 3, -1}, - { -1, 4, -16, 112, 37, -11, 4, -1}, - { -1, 5, -18, 105, 48, -14, 4, -1}, - { -1, 5, -19, 97, 58, -16, 5, -1}, - { -1, 6, -19, 88, 68, -18, 5, -1}, - { -1, 6, -19, 78, 78, -19, 6, -1}, - { -1, 5, -18, 68, 88, -19, 6, -1}, - { -1, 5, -16, 58, 97, -19, 5, -1}, - { -1, 4, -14, 48, 105, -18, 5, -1}, - { -1, 4, -11, 37, 112, -16, 4, -1}, - { -1, 3, -9, 27, 118, -13, 4, -1}, - { 0, 2, -6, 18, 122, -10, 3, -1}, - { 0, 1, -3, 8, 126, -5, 1, 0} -}; - -// DCT based filter -DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8s[SUBPEL_SHIFTS]) = { - {0, 0, 0, 128, 0, 0, 0, 0}, - {-1, 3, -7, 127, 8, -3, 1, 0}, - {-2, 5, -13, 125, 17, -6, 3, -1}, - {-3, 7, -17, 121, 27, -10, 5, -2}, - {-4, 9, -20, 115, 37, -13, 6, -2}, - {-4, 10, -23, 108, 48, -16, 8, -3}, - {-4, 10, -24, 100, 59, -19, 9, -3}, - {-4, 11, -24, 90, 70, -21, 10, -4}, - {-4, 11, -23, 80, 80, -23, 11, -4}, - {-4, 10, -21, 70, 90, -24, 11, -4}, - {-3, 9, -19, 59, 100, -24, 10, -4}, - {-3, 8, -16, 48, 108, -23, 10, -4}, - {-2, 6, -13, 37, 115, -20, 9, -4}, - {-2, 5, -10, 27, 121, -17, 7, -3}, - {-1, 3, -6, 17, 125, -13, 5, -2}, - {0, 1, -3, 8, 127, -7, 3, -1} -}; - -// freqmultiplier = 0.5 -DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0}, - {-3, -1, 32, 64, 38, 1, -3, 0}, - {-2, -2, 29, 63, 41, 2, -3, 0}, - {-2, -2, 26, 63, 43, 4, -4, 0}, - {-2, -3, 24, 62, 46, 5, -4, 0}, - {-2, -3, 21, 60, 49, 7, -4, 0}, - {-1, -4, 18, 59, 51, 9, -4, 0}, - {-1, -4, 16, 57, 53, 12, -4, -1}, - {-1, -4, 14, 55, 55, 14, -4, -1}, - {-1, -4, 12, 53, 57, 16, -4, -1}, - { 0, -4, 9, 51, 59, 18, -4, -1}, - { 0, -4, 7, 49, 60, 21, -3, -2}, - { 0, -4, 5, 46, 62, 24, -3, -2}, - { 0, -4, 4, 43, 63, 26, -2, -2}, - { 0, -3, 2, 41, 63, 29, -2, -2}, - { 0, -3, 1, 38, 64, 32, -1, -3} -}; - - -const InterpKernel *vp9_filter_kernels[4] = { - sub_pel_filters_8, - sub_pel_filters_8lp, - sub_pel_filters_8s, - bilinear_filters -}; diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.h b/thirdparty/libvpx/vp9/common/vp9_filter.h deleted file mode 100644 index efa24bc67b..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_filter.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_FILTER_H_ -#define VP9_COMMON_VP9_FILTER_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_filter.h" -#include "vpx_ports/mem.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -#define EIGHTTAP 0 -#define EIGHTTAP_SMOOTH 1 -#define EIGHTTAP_SHARP 2 -#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ -#define BILINEAR 3 -// The codec can operate in four possible inter prediction filter mode: -// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. -#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) -#define SWITCHABLE 4 /* should be the last one */ - -typedef uint8_t INTERP_FILTER; - -extern const InterpKernel *vp9_filter_kernels[4]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c deleted file mode 100644 index 0f41d66985..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_frame_buffers.h" -#include "vpx_mem/vpx_mem.h" - -int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) { - assert(list != NULL); - vp9_free_internal_frame_buffers(list); - - list->num_internal_frame_buffers = - VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; - list->int_fb = - (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers, - sizeof(*list->int_fb)); - return (list->int_fb == NULL); -} - -void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) { - int i; - - assert(list != NULL); - - for (i = 0; i < list->num_internal_frame_buffers; ++i) { - vpx_free(list->int_fb[i].data); - list->int_fb[i].data = NULL; - } - vpx_free(list->int_fb); - list->int_fb = NULL; -} - -int vp9_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb) { - int i; - InternalFrameBufferList *const int_fb_list = - (InternalFrameBufferList *)cb_priv; - if (int_fb_list == NULL) - return -1; - - // Find a free frame buffer. - for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { - if (!int_fb_list->int_fb[i].in_use) - break; - } - - if (i == int_fb_list->num_internal_frame_buffers) - return -1; - - if (int_fb_list->int_fb[i].size < min_size) { - int_fb_list->int_fb[i].data = - (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size); - if (!int_fb_list->int_fb[i].data) - return -1; - - // This memset is needed for fixing valgrind error from C loop filter - // due to access uninitialized memory in frame border. It could be - // removed if border is totally removed. - memset(int_fb_list->int_fb[i].data, 0, min_size); - int_fb_list->int_fb[i].size = min_size; - } - - fb->data = int_fb_list->int_fb[i].data; - fb->size = int_fb_list->int_fb[i].size; - int_fb_list->int_fb[i].in_use = 1; - - // Set the frame buffer's private data to point at the internal frame buffer. - fb->priv = &int_fb_list->int_fb[i]; - return 0; -} - -int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { - InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; - (void)cb_priv; - if (int_fb) - int_fb->in_use = 0; - return 0; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h deleted file mode 100644 index e2cfe61b66..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_ -#define VP9_COMMON_VP9_FRAME_BUFFERS_H_ - -#include "vpx/vpx_frame_buffer.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct InternalFrameBuffer { - uint8_t *data; - size_t size; - int in_use; -} InternalFrameBuffer; - -typedef struct InternalFrameBufferList { - int num_internal_frame_buffers; - InternalFrameBuffer *int_fb; -} InternalFrameBufferList; - -// Initializes |list|. Returns 0 on success. -int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list); - -// Free any data allocated to the frame buffers. -void vp9_free_internal_frame_buffers(InternalFrameBufferList *list); - -// Callback used by libvpx to request an external frame buffer. |cb_priv| -// Callback private data, which points to an InternalFrameBufferList. -// |min_size| is the minimum size in bytes needed to decode the next frame. -// |fb| pointer to the frame buffer. -int vp9_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb); - -// Callback used by libvpx when there are no references to the frame buffer. -// |cb_priv| is not used. |fb| pointer to the frame buffer. -int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.c b/thirdparty/libvpx/vp9/common/vp9_idct.c deleted file mode 100644 index 1b420143bb..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_idct.c +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vp9_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_idct.h" -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - const transform_2d IHT_4[] = { - { idct4_c, idct4_c }, // DCT_DCT = 0 - { iadst4_c, idct4_c }, // ADST_DCT = 1 - { idct4_c, iadst4_c }, // DCT_ADST = 2 - { iadst4_c, iadst4_c } // ADST_ADST = 3 - }; - - int i, j; - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - tran_low_t temp_in[4], temp_out[4]; - - // inverse transform row vectors - for (i = 0; i < 4; ++i) { - IHT_4[tx_type].rows(input, outptr); - input += 4; - outptr += 4; - } - - // inverse transform column vectors - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - IHT_4[tx_type].cols(temp_in, temp_out); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 4)); - } - } -} - -static const transform_2d IHT_8[] = { - { idct8_c, idct8_c }, // DCT_DCT = 0 - { iadst8_c, idct8_c }, // ADST_DCT = 1 - { idct8_c, iadst8_c }, // DCT_ADST = 2 - { iadst8_c, iadst8_c } // ADST_ADST = 3 -}; - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - int i, j; - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - tran_low_t temp_in[8], temp_out[8]; - const transform_2d ht = IHT_8[tx_type]; - - // inverse transform row vectors - for (i = 0; i < 8; ++i) { - ht.rows(input, outptr); - input += 8; - outptr += 8; - } - - // inverse transform column vectors - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - ht.cols(temp_in, temp_out); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 5)); - } - } -} - -static const transform_2d IHT_16[] = { - { idct16_c, idct16_c }, // DCT_DCT = 0 - { iadst16_c, idct16_c }, // ADST_DCT = 1 - { idct16_c, iadst16_c }, // DCT_ADST = 2 - { iadst16_c, iadst16_c } // ADST_ADST = 3 -}; - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - int i, j; - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - tran_low_t temp_in[16], temp_out[16]; - const transform_2d ht = IHT_16[tx_type]; - - // Rows - for (i = 0; i < 16; ++i) { - ht.rows(input, outptr); - input += 16; - outptr += 16; - } - - // Columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - ht.cols(temp_in, temp_out); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -// idct -void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - if (eob > 1) - vpx_idct4x4_16_add(input, dest, stride); - else - vpx_idct4x4_1_add(input, dest, stride); -} - - -void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - if (eob > 1) - vpx_iwht4x4_16_add(input, dest, stride); - else - vpx_iwht4x4_1_add(input, dest, stride); -} - -void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - if (eob == 1) - // DC only DCT coefficient - vpx_idct8x8_1_add(input, dest, stride); - else if (eob <= 12) - vpx_idct8x8_12_add(input, dest, stride); - else - vpx_idct8x8_64_add(input, dest, stride); -} - -void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - /* The calculation can be simplified if there are not many non-zero dct - * coefficients. Use eobs to separate different cases. */ - if (eob == 1) - /* DC only DCT coefficient. */ - vpx_idct16x16_1_add(input, dest, stride); - else if (eob <= 10) - vpx_idct16x16_10_add(input, dest, stride); - else - vpx_idct16x16_256_add(input, dest, stride); -} - -void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - if (eob == 1) - vpx_idct32x32_1_add(input, dest, stride); - else if (eob <= 34) - // non-zero coeff only in upper-left 8x8 - vpx_idct32x32_34_add(input, dest, stride); - else if (eob <= 135) - // non-zero coeff only in upper-left 16x16 - vpx_idct32x32_135_add(input, dest, stride); - else - vpx_idct32x32_1024_add(input, dest, stride); -} - -// iht -void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) - vp9_idct4x4_add(input, dest, stride, eob); - else - vp9_iht4x4_16_add(input, dest, stride, tx_type); -} - -void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct8x8_add(input, dest, stride, eob); - } else { - vp9_iht8x8_64_add(input, dest, stride, tx_type); - } -} - -void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct16x16_add(input, dest, stride, eob); - } else { - vp9_iht16x16_256_add(input, dest, stride, tx_type); - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { - const highbd_transform_2d IHT_4[] = { - { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 - }; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - int i, j; - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - tran_low_t temp_in[4], temp_out[4]; - - // Inverse transform row vectors. - for (i = 0; i < 4; ++i) { - IHT_4[tx_type].rows(input, outptr, bd); - input += 4; - outptr += 4; - } - - // Inverse transform column vectors. - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - IHT_4[tx_type].cols(temp_in, temp_out, bd); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); - } - } -} - -static const highbd_transform_2d HIGH_IHT_8[] = { - { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 -}; - -void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { - int i, j; - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - tran_low_t temp_in[8], temp_out[8]; - const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Inverse transform row vectors. - for (i = 0; i < 8; ++i) { - ht.rows(input, outptr, bd); - input += 8; - outptr += 8; - } - - // Inverse transform column vectors. - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - ht.cols(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } -} - -static const highbd_transform_2d HIGH_IHT_16[] = { - { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 -}; - -void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { - int i, j; - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - tran_low_t temp_in[16], temp_out[16]; - const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - for (i = 0; i < 16; ++i) { - ht.rows(input, outptr, bd); - input += 16; - outptr += 16; - } - - // Columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - ht.cols(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -// idct -void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { - if (eob > 1) - vpx_highbd_idct4x4_16_add(input, dest, stride, bd); - else - vpx_highbd_idct4x4_1_add(input, dest, stride, bd); -} - - -void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { - if (eob > 1) - vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); - else - vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); -} - -void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - // DC only DCT coefficient - if (eob == 1) { - vpx_highbd_idct8x8_1_add(input, dest, stride, bd); - } else if (eob <= 10) { - vpx_highbd_idct8x8_10_add(input, dest, stride, bd); - } else { - vpx_highbd_idct8x8_64_add(input, dest, stride, bd); - } -} - -void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd) { - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to separate different cases. - // DC only DCT coefficient. - if (eob == 1) { - vpx_highbd_idct16x16_1_add(input, dest, stride, bd); - } else if (eob <= 10) { - vpx_highbd_idct16x16_10_add(input, dest, stride, bd); - } else { - vpx_highbd_idct16x16_256_add(input, dest, stride, bd); - } -} - -void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd) { - // Non-zero coeff only in upper-left 8x8 - if (eob == 1) { - vpx_highbd_idct32x32_1_add(input, dest, stride, bd); - } else if (eob <= 34) { - vpx_highbd_idct32x32_34_add(input, dest, stride, bd); - } else { - vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); - } -} - -// iht -void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd) { - if (tx_type == DCT_DCT) - vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); - else - vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); -} - -void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd) { - if (tx_type == DCT_DCT) { - vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); - } else { - vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); - } -} - -void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd) { - if (tx_type == DCT_DCT) { - vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); - } else { - vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.h b/thirdparty/libvpx/vp9/common/vp9_idct.h deleted file mode 100644 index b5a3fbf362..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_idct.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_IDCT_H_ -#define VP9_COMMON_VP9_IDCT_H_ - -#include - -#include "./vpx_config.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_enums.h" -#include "vpx_dsp/inv_txfm.h" -#include "vpx_dsp/txfm_common.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); - -typedef struct { - transform_1d cols, rows; // vertical and horizontal -} transform_2d; - -#if CONFIG_VP9_HIGHBITDEPTH -typedef void (*highbd_transform_1d)(const tran_low_t*, tran_low_t*, int bd); - -typedef struct { - highbd_transform_1d cols, rows; // vertical and horizontal -} highbd_transform_2d; -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); - -void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob); -void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob); -void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); -void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); -void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); -void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd); -void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd); -void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd); -void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd); -void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd); -#endif // CONFIG_VP9_HIGHBITDEPTH -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_IDCT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.c b/thirdparty/libvpx/vp9/common/vp9_loopfilter.c deleted file mode 100644 index 183dec4e71..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_loopfilter.c +++ /dev/null @@ -1,1697 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_reconinter.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" - -#include "vp9/common/vp9_seg_common.h" - -// 64 bit masks for left transform size. Each 1 represents a position where -// we should apply a loop filter across the left border of an 8x8 block -// boundary. -// -// In the case of TX_16X16-> ( in low order byte first we end up with -// a mask that looks like this -// -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// -// A loopfilter should be applied to every other 8x8 horizontally. -static const uint64_t left_64x64_txform_mask[TX_SIZES]= { - 0xffffffffffffffffULL, // TX_4X4 - 0xffffffffffffffffULL, // TX_8x8 - 0x5555555555555555ULL, // TX_16x16 - 0x1111111111111111ULL, // TX_32x32 -}; - -// 64 bit masks for above transform size. Each 1 represents a position where -// we should apply a loop filter across the top border of an 8x8 block -// boundary. -// -// In the case of TX_32x32 -> ( in low order byte first we end up with -// a mask that looks like this -// -// 11111111 -// 00000000 -// 00000000 -// 00000000 -// 11111111 -// 00000000 -// 00000000 -// 00000000 -// -// A loopfilter should be applied to every other 4 the row vertically. -static const uint64_t above_64x64_txform_mask[TX_SIZES]= { - 0xffffffffffffffffULL, // TX_4X4 - 0xffffffffffffffffULL, // TX_8x8 - 0x00ff00ff00ff00ffULL, // TX_16x16 - 0x000000ff000000ffULL, // TX_32x32 -}; - -// 64 bit masks for prediction sizes (left). Each 1 represents a position -// where left border of an 8x8 block. These are aligned to the right most -// appropriate bit, and then shifted into place. -// -// In the case of TX_16x32 -> ( low order byte first ) we end up with -// a mask that looks like this : -// -// 10000000 -// 10000000 -// 10000000 -// 10000000 -// 00000000 -// 00000000 -// 00000000 -// 00000000 -static const uint64_t left_prediction_mask[BLOCK_SIZES] = { - 0x0000000000000001ULL, // BLOCK_4X4, - 0x0000000000000001ULL, // BLOCK_4X8, - 0x0000000000000001ULL, // BLOCK_8X4, - 0x0000000000000001ULL, // BLOCK_8X8, - 0x0000000000000101ULL, // BLOCK_8X16, - 0x0000000000000001ULL, // BLOCK_16X8, - 0x0000000000000101ULL, // BLOCK_16X16, - 0x0000000001010101ULL, // BLOCK_16X32, - 0x0000000000000101ULL, // BLOCK_32X16, - 0x0000000001010101ULL, // BLOCK_32X32, - 0x0101010101010101ULL, // BLOCK_32X64, - 0x0000000001010101ULL, // BLOCK_64X32, - 0x0101010101010101ULL, // BLOCK_64X64 -}; - -// 64 bit mask to shift and set for each prediction size. -static const uint64_t above_prediction_mask[BLOCK_SIZES] = { - 0x0000000000000001ULL, // BLOCK_4X4 - 0x0000000000000001ULL, // BLOCK_4X8 - 0x0000000000000001ULL, // BLOCK_8X4 - 0x0000000000000001ULL, // BLOCK_8X8 - 0x0000000000000001ULL, // BLOCK_8X16, - 0x0000000000000003ULL, // BLOCK_16X8 - 0x0000000000000003ULL, // BLOCK_16X16 - 0x0000000000000003ULL, // BLOCK_16X32, - 0x000000000000000fULL, // BLOCK_32X16, - 0x000000000000000fULL, // BLOCK_32X32, - 0x000000000000000fULL, // BLOCK_32X64, - 0x00000000000000ffULL, // BLOCK_64X32, - 0x00000000000000ffULL, // BLOCK_64X64 -}; -// 64 bit mask to shift and set for each prediction size. A bit is set for -// each 8x8 block that would be in the left most block of the given block -// size in the 64x64 block. -static const uint64_t size_mask[BLOCK_SIZES] = { - 0x0000000000000001ULL, // BLOCK_4X4 - 0x0000000000000001ULL, // BLOCK_4X8 - 0x0000000000000001ULL, // BLOCK_8X4 - 0x0000000000000001ULL, // BLOCK_8X8 - 0x0000000000000101ULL, // BLOCK_8X16, - 0x0000000000000003ULL, // BLOCK_16X8 - 0x0000000000000303ULL, // BLOCK_16X16 - 0x0000000003030303ULL, // BLOCK_16X32, - 0x0000000000000f0fULL, // BLOCK_32X16, - 0x000000000f0f0f0fULL, // BLOCK_32X32, - 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64, - 0x00000000ffffffffULL, // BLOCK_64X32, - 0xffffffffffffffffULL, // BLOCK_64X64 -}; - -// These are used for masking the left and above borders. -static const uint64_t left_border = 0x1111111111111111ULL; -static const uint64_t above_border = 0x000000ff000000ffULL; - -// 16 bit masks for uv transform sizes. -static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= { - 0xffff, // TX_4X4 - 0xffff, // TX_8x8 - 0x5555, // TX_16x16 - 0x1111, // TX_32x32 -}; - -static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= { - 0xffff, // TX_4X4 - 0xffff, // TX_8x8 - 0x0f0f, // TX_16x16 - 0x000f, // TX_32x32 -}; - -// 16 bit left mask to shift and set for each uv prediction size. -static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { - 0x0001, // BLOCK_4X4, - 0x0001, // BLOCK_4X8, - 0x0001, // BLOCK_8X4, - 0x0001, // BLOCK_8X8, - 0x0001, // BLOCK_8X16, - 0x0001, // BLOCK_16X8, - 0x0001, // BLOCK_16X16, - 0x0011, // BLOCK_16X32, - 0x0001, // BLOCK_32X16, - 0x0011, // BLOCK_32X32, - 0x1111, // BLOCK_32X64 - 0x0011, // BLOCK_64X32, - 0x1111, // BLOCK_64X64 -}; -// 16 bit above mask to shift and set for uv each prediction size. -static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { - 0x0001, // BLOCK_4X4 - 0x0001, // BLOCK_4X8 - 0x0001, // BLOCK_8X4 - 0x0001, // BLOCK_8X8 - 0x0001, // BLOCK_8X16, - 0x0001, // BLOCK_16X8 - 0x0001, // BLOCK_16X16 - 0x0001, // BLOCK_16X32, - 0x0003, // BLOCK_32X16, - 0x0003, // BLOCK_32X32, - 0x0003, // BLOCK_32X64, - 0x000f, // BLOCK_64X32, - 0x000f, // BLOCK_64X64 -}; - -// 64 bit mask to shift and set for each uv prediction size -static const uint16_t size_mask_uv[BLOCK_SIZES] = { - 0x0001, // BLOCK_4X4 - 0x0001, // BLOCK_4X8 - 0x0001, // BLOCK_8X4 - 0x0001, // BLOCK_8X8 - 0x0001, // BLOCK_8X16, - 0x0001, // BLOCK_16X8 - 0x0001, // BLOCK_16X16 - 0x0011, // BLOCK_16X32, - 0x0003, // BLOCK_32X16, - 0x0033, // BLOCK_32X32, - 0x3333, // BLOCK_32X64, - 0x00ff, // BLOCK_64X32, - 0xffff, // BLOCK_64X64 -}; -static const uint16_t left_border_uv = 0x1111; -static const uint16_t above_border_uv = 0x000f; - -static const int mode_lf_lut[MB_MODE_COUNT] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES - 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) -}; - -static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { - int lvl; - - // For each possible value for the loop filter fill out limits - for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { - // Set loop filter parameters that control sharpness. - int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); - - if (sharpness_lvl > 0) { - if (block_inside_limit > (9 - sharpness_lvl)) - block_inside_limit = (9 - sharpness_lvl); - } - - if (block_inside_limit < 1) - block_inside_limit = 1; - - memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); - memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), - SIMD_WIDTH); - } -} - -static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, - const MODE_INFO *mi) { - return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]] - [mode_lf_lut[mi->mode]]; -} - -void vp9_loop_filter_init(VP9_COMMON *cm) { - loop_filter_info_n *lfi = &cm->lf_info; - struct loopfilter *lf = &cm->lf; - int lvl; - - // init limits for given sharpness - update_sharpness(lfi, lf->sharpness_level); - lf->last_sharpness_level = lf->sharpness_level; - - // init hev threshold const vectors - for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) - memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); -} - -void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { - int seg_id; - // n_shift is the multiplier for lf_deltas - // the multiplier is 1 for when filter_lvl is between 0 and 31; - // 2 when filter_lvl is between 32 and 63 - const int scale = 1 << (default_filt_lvl >> 5); - loop_filter_info_n *const lfi = &cm->lf_info; - struct loopfilter *const lf = &cm->lf; - const struct segmentation *const seg = &cm->seg; - - // update limits if sharpness has changed - if (lf->last_sharpness_level != lf->sharpness_level) { - update_sharpness(lfi, lf->sharpness_level); - lf->last_sharpness_level = lf->sharpness_level; - } - - for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { - int lvl_seg = default_filt_lvl; - if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { - const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); - lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? - data : default_filt_lvl + data, - 0, MAX_LOOP_FILTER); - } - - if (!lf->mode_ref_delta_enabled) { - // we could get rid of this if we assume that deltas are set to - // zero when not in use; encoder always uses deltas - memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); - } else { - int ref, mode; - const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; - lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); - - for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { - for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { - const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale - + lf->mode_deltas[mode] * scale; - lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); - } - } - } - } -} - -static void filter_selectively_vert_row2(int subsampling_factor, - uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl) { - const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; - const int lfl_forward = subsampling_factor ? 4 : 8; - const unsigned int dual_one = 1 | (1 << lfl_forward); - unsigned int mask; - uint8_t *ss[2]; - ss[0] = s; - - for (mask = - (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; - mask; mask = (mask & ~dual_one) >> 1) { - if (mask & dual_one) { - const loop_filter_thresh *lfis[2]; - lfis[0] = lfthr + *lfl; - lfis[1] = lfthr + *(lfl + lfl_forward); - ss[1] = ss[0] + 8 * pitch; - - if (mask_16x16 & dual_one) { - if ((mask_16x16 & dual_one) == dual_one) { - vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, - lfis[0]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; - vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr); - } - } - - if (mask_8x8 & dual_one) { - if ((mask_8x8 & dual_one) == dual_one) { - vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, - lfis[0]->hev_thr, lfis[1]->mblim, - lfis[1]->lim, lfis[1]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; - vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } - - if (mask_4x4 & dual_one) { - if ((mask_4x4 & dual_one) == dual_one) { - vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, - lfis[0]->hev_thr, lfis[1]->mblim, - lfis[1]->lim, lfis[1]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; - vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } - - if (mask_4x4_int & dual_one) { - if ((mask_4x4_int & dual_one) == dual_one) { - vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; - vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr); - } - } - } - - ss[0] += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert_row2(int subsampling_factor, - uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl, int bd) { - const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; - const int lfl_forward = subsampling_factor ? 4 : 8; - const unsigned int dual_one = 1 | (1 << lfl_forward); - unsigned int mask; - uint16_t *ss[2]; - ss[0] = s; - - for (mask = - (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; - mask; mask = (mask & ~dual_one) >> 1) { - if (mask & dual_one) { - const loop_filter_thresh *lfis[2]; - lfis[0] = lfthr + *lfl; - lfis[1] = lfthr + *(lfl + lfl_forward); - ss[1] = ss[0] + 8 * pitch; - - if (mask_16x16 & dual_one) { - if ((mask_16x16 & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; - vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - - if (mask_8x8 & dual_one) { - if ((mask_8x8 & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; - vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - - if (mask_4x4 & dual_one) { - if ((mask_4x4 & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; - vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - - if (mask_4x4_int & dual_one) { - if ((mask_4x4_int & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; - vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, - lfi->mblim, lfi->lim, lfi->hev_thr, bd); - } - } - } - - ss[0] += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void filter_selectively_horiz(uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl) { - unsigned int mask; - int count; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= count) { - count = 1; - if (mask & 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask_16x16 & 1) { - if ((mask_16x16 & 3) == 3) { - vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - count = 2; - } else { - vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } else if (mask_8x8 & 1) { - if ((mask_8x8 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - - if ((mask_4x4_int & 3) == 3) { - vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr); - } else { - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - else if (mask_4x4_int & 2) - vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr); - } - count = 2; - } else { - vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } else if (mask_4x4 & 1) { - if ((mask_4x4 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - if ((mask_4x4_int & 3) == 3) { - vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr); - } else { - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - else if (mask_4x4_int & 2) - vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr); - } - count = 2; - } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } else { - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } - s += 8 * count; - lfl += count; - mask_16x16 >>= count; - mask_8x8 >>= count; - mask_4x4 >>= count; - mask_4x4_int >>= count; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl, int bd) { - unsigned int mask; - int count; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= count) { - count = 1; - if (mask & 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask_16x16 & 1) { - if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - count = 2; - } else { - vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } - } else if (mask_8x8 & 1) { - if ((mask_8x8 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - - if ((mask_4x4_int & 3) == 3) { - vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - } else { - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } else if (mask_4x4_int & 2) { - vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } - } - count = 2; - } else { - vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - } else if (mask_4x4 & 1) { - if ((mask_4x4 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - if ((mask_4x4_int & 3) == 3) { - vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - } else { - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } else if (mask_4x4_int & 2) { - vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } - } - count = 2; - } else { - vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - } else { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } - } - s += 8 * count; - lfl += count; - mask_16x16 >>= count; - mask_8x8 >>= count; - mask_4x4 >>= count; - mask_4x4_int >>= count; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -// This function ors into the current lfm structure, where to do loop -// filters for the specific mi we are looking at. It uses information -// including the block_size_type (32x16, 32x32, etc.), the transform size, -// whether there were any coefficients encoded, and the loop filter strength -// block we are currently looking at. Shift is used to position the -// 1's we produce. -static void build_masks(const loop_filter_info_n *const lfi_n, - const MODE_INFO *mi, const int shift_y, - const int shift_uv, - LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->sb_type; - const TX_SIZE tx_size_y = mi->tx_size; - const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); - const int filter_level = get_filter_level(lfi_n, mi); - uint64_t *const left_y = &lfm->left_y[tx_size_y]; - uint64_t *const above_y = &lfm->above_y[tx_size_y]; - uint64_t *const int_4x4_y = &lfm->int_4x4_y; - uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; - uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; - uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; - int i; - - // If filter level is 0 we don't loop filter. - if (!filter_level) { - return; - } else { - const int w = num_8x8_blocks_wide_lookup[block_size]; - const int h = num_8x8_blocks_high_lookup[block_size]; - int index = shift_y; - for (i = 0; i < h; i++) { - memset(&lfm->lfl_y[index], filter_level, w); - index += 8; - } - } - - // These set 1 in the current block size for the block size edges. - // For instance if the block size is 32x16, we'll set: - // above = 1111 - // 0000 - // and - // left = 1000 - // = 1000 - // NOTE : In this example the low bit is left most ( 1000 ) is stored as - // 1, not 8... - // - // U and V set things on a 16 bit scale. - // - *above_y |= above_prediction_mask[block_size] << shift_y; - *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; - *left_y |= left_prediction_mask[block_size] << shift_y; - *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; - - // If the block has no coefficients and is not intra we skip applying - // the loop filter on block edges. - if (mi->skip && is_inter_block(mi)) - return; - - // Here we are adding a mask for the transform size. The transform - // size mask is set to be correct for a 64x64 prediction block size. We - // mask to match the size of the block we are working on and then shift it - // into place.. - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; - *above_uv |= (size_mask_uv[block_size] & - above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; - *left_uv |= (size_mask_uv[block_size] & - left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - - // Here we are trying to determine what to do with the internal 4x4 block - // boundaries. These differ from the 4x4 boundaries on the outside edge of - // an 8x8 in that the internal ones can be skipped and don't depend on - // the prediction block size. - if (tx_size_y == TX_4X4) - *int_4x4_y |= size_mask[block_size] << shift_y; - - if (tx_size_uv == TX_4X4) - *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; -} - -// This function does the same thing as the one above with the exception that -// it only affects the y masks. It exists because for blocks < 16x16 in size, -// we only update u and v masks on the first block. -static void build_y_mask(const loop_filter_info_n *const lfi_n, - const MODE_INFO *mi, const int shift_y, - LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->sb_type; - const TX_SIZE tx_size_y = mi->tx_size; - const int filter_level = get_filter_level(lfi_n, mi); - uint64_t *const left_y = &lfm->left_y[tx_size_y]; - uint64_t *const above_y = &lfm->above_y[tx_size_y]; - uint64_t *const int_4x4_y = &lfm->int_4x4_y; - int i; - - if (!filter_level) { - return; - } else { - const int w = num_8x8_blocks_wide_lookup[block_size]; - const int h = num_8x8_blocks_high_lookup[block_size]; - int index = shift_y; - for (i = 0; i < h; i++) { - memset(&lfm->lfl_y[index], filter_level, w); - index += 8; - } - } - - *above_y |= above_prediction_mask[block_size] << shift_y; - *left_y |= left_prediction_mask[block_size] << shift_y; - - if (mi->skip && is_inter_block(mi)) - return; - - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; - - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; - - if (tx_size_y == TX_4X4) - *int_4x4_y |= size_mask[block_size] << shift_y; -} - -void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, - const int mi_col, LOOP_FILTER_MASK *lfm) { - int i; - - // The largest loopfilter we have is 16x16 so we use the 16x16 mask - // for 32x32 transforms also. - lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; - lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; - lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; - lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; - - // We do at least 8 tap filter on every 32x32 even if the transform size - // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and - // remove it from the 4x4. - lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; - lfm->left_y[TX_4X4] &= ~left_border; - lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; - lfm->above_y[TX_4X4] &= ~above_border; - lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; - lfm->left_uv[TX_4X4] &= ~left_border_uv; - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; - lfm->above_uv[TX_4X4] &= ~above_border_uv; - - // We do some special edge handling. - if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { - const uint64_t rows = cm->mi_rows - mi_row; - - // Each pixel inside the border gets a 1, - const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); - const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); - - // Remove values completely outside our border. - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= mask_y; - lfm->above_y[i] &= mask_y; - lfm->left_uv[i] &= mask_uv; - lfm->above_uv[i] &= mask_uv; - } - lfm->int_4x4_y &= mask_y; - lfm->int_4x4_uv &= mask_uv; - - // We don't apply a wide loop filter on the last uv block row. If set - // apply the shorter one instead. - if (rows == 1) { - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; - lfm->above_uv[TX_16X16] = 0; - } - if (rows == 5) { - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; - lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); - } - } - - if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { - const uint64_t columns = cm->mi_cols - mi_col; - - // Each pixel inside the border gets a 1, the multiply copies the border - // to where we need it. - const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; - const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; - - // Internal edges are not applied on the last column of the image so - // we mask 1 more for the internal edges - const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; - - // Remove the bits outside the image edge. - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= mask_y; - lfm->above_y[i] &= mask_y; - lfm->left_uv[i] &= mask_uv; - lfm->above_uv[i] &= mask_uv; - } - lfm->int_4x4_y &= mask_y; - lfm->int_4x4_uv &= mask_uv_int; - - // We don't apply a wide loop filter on the last uv column. If set - // apply the shorter one instead. - if (columns == 1) { - lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; - lfm->left_uv[TX_16X16] = 0; - } - if (columns == 5) { - lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); - lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); - } - } - // We don't apply a loop filter on the first column in the image, mask that - // out. - if (mi_col == 0) { - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= 0xfefefefefefefefeULL; - lfm->left_uv[i] &= 0xeeee; - } - } - - // Assert if we try to apply 2 different loop filters at the same position. - assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); - assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); - assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); - assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); - assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); - assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); - assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); - assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); - assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); - assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); - assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); - assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); - assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); - assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); - assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); - assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); -} - -// This function sets up the bit masks for the entire 64x64 region represented -// by mi_row, mi_col. -void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi, const int mode_info_stride, - LOOP_FILTER_MASK *lfm) { - int idx_32, idx_16, idx_8; - const loop_filter_info_n *const lfi_n = &cm->lf_info; - MODE_INFO **mip = mi; - MODE_INFO **mip2 = mi; - - // These are offsets to the next mi in the 64x64 block. It is what gets - // added to the mi ptr as we go through each loop. It helps us to avoid - // setting up special row and column counters for each index. The last step - // brings us out back to the starting position. - const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, - -(mode_info_stride << 2) - 4}; - const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, - -(mode_info_stride << 1) - 2}; - const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; - - // Following variables represent shifts to position the current block - // mask over the appropriate block. A shift of 36 to the left will move - // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left - // 4 rows to the appropriate spot. - const int shift_32_y[] = {0, 4, 32, 36}; - const int shift_16_y[] = {0, 2, 16, 18}; - const int shift_8_y[] = {0, 1, 8, 9}; - const int shift_32_uv[] = {0, 2, 8, 10}; - const int shift_16_uv[] = {0, 1, 4, 5}; - const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? - cm->mi_rows - mi_row : MI_BLOCK_SIZE); - const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? - cm->mi_cols - mi_col : MI_BLOCK_SIZE); - - vp9_zero(*lfm); - assert(mip[0] != NULL); - - switch (mip[0]->sb_type) { - case BLOCK_64X64: - build_masks(lfi_n, mip[0] , 0, 0, lfm); - break; - case BLOCK_64X32: - build_masks(lfi_n, mip[0], 0, 0, lfm); - mip2 = mip + mode_info_stride * 4; - if (4 >= max_rows) - break; - build_masks(lfi_n, mip2[0], 32, 8, lfm); - break; - case BLOCK_32X64: - build_masks(lfi_n, mip[0], 0, 0, lfm); - mip2 = mip + 4; - if (4 >= max_cols) - break; - build_masks(lfi_n, mip2[0], 4, 2, lfm); - break; - default: - for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { - const int shift_y = shift_32_y[idx_32]; - const int shift_uv = shift_32_uv[idx_32]; - const int mi_32_col_offset = ((idx_32 & 1) << 2); - const int mi_32_row_offset = ((idx_32 >> 1) << 2); - if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) - continue; - switch (mip[0]->sb_type) { - case BLOCK_32X32: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - break; - case BLOCK_32X16: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_32_row_offset + 2 >= max_rows) - continue; - mip2 = mip + mode_info_stride * 2; - build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); - break; - case BLOCK_16X32: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_32_col_offset + 2 >= max_cols) - continue; - mip2 = mip + 2; - build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); - break; - default: - for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { - const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; - const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; - const int mi_16_col_offset = mi_32_col_offset + - ((idx_16 & 1) << 1); - const int mi_16_row_offset = mi_32_row_offset + - ((idx_16 >> 1) << 1); - - if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) - continue; - - switch (mip[0]->sb_type) { - case BLOCK_16X16: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - break; - case BLOCK_16X8: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_16_row_offset + 1 >= max_rows) - continue; - mip2 = mip + mode_info_stride; - build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); - break; - case BLOCK_8X16: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_16_col_offset +1 >= max_cols) - continue; - mip2 = mip + 1; - build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); - break; - default: { - const int shift_y = shift_32_y[idx_32] + - shift_16_y[idx_16] + - shift_8_y[0]; - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - mip += offset[0]; - for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { - const int shift_y = shift_32_y[idx_32] + - shift_16_y[idx_16] + - shift_8_y[idx_8]; - const int mi_8_col_offset = mi_16_col_offset + - ((idx_8 & 1)); - const int mi_8_row_offset = mi_16_row_offset + - ((idx_8 >> 1)); - - if (mi_8_col_offset >= max_cols || - mi_8_row_offset >= max_rows) - continue; - build_y_mask(lfi_n, mip[0], shift_y, lfm); - } - break; - } - } - } - break; - } - } - break; - } -} - -static void filter_selectively_vert(uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl) { - unsigned int mask; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask & 1) { - if (mask_16x16 & 1) { - vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } else if (mask_8x8 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } - } - if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - s += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert(uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl, int bd) { - unsigned int mask; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask & 1) { - if (mask_16x16 & 1) { - vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } else if (mask_8x8 & 1) { - vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } else if (mask_4x4 & 1) { - vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } - } - if (mask_4x4_int & 1) - vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - s += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_filter_block_plane_non420(VP9_COMMON *cm, - struct macroblockd_plane *plane, - MODE_INFO **mi_8x8, - int mi_row, int mi_col) { - const int ss_x = plane->subsampling_x; - const int ss_y = plane->subsampling_y; - const int row_step = 1 << ss_y; - const int col_step = 1 << ss_x; - const int row_step_stride = cm->mi_stride * row_step; - struct buf_2d *const dst = &plane->dst; - uint8_t* const dst0 = dst->buf; - unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; - unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; - unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; - unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; - uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; - int r, c; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { - unsigned int mask_16x16_c = 0; - unsigned int mask_8x8_c = 0; - unsigned int mask_4x4_c = 0; - unsigned int border_mask; - - // Determine the vertical edges that need filtering - for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { - const MODE_INFO *mi = mi_8x8[c]; - const BLOCK_SIZE sb_type = mi[0].sb_type; - const int skip_this = mi[0].skip && is_inter_block(mi); - // left edge of current unit is block/partition edge -> no skip - const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? - !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; - const int skip_this_c = skip_this && !block_edge_left; - // top edge of current unit is block/partition edge -> no skip - const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? - !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; - const int skip_this_r = skip_this && !block_edge_above; - const TX_SIZE tx_size = get_uv_tx_size(mi, plane); - const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; - const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; - - // Filter level can vary per MI - if (!(lfl[(r << 3) + (c >> ss_x)] = - get_filter_level(&cm->lf_info, mi))) - continue; - - // Build masks based on the transform size of each block - if (tx_size == TX_32X32) { - if (!skip_this_c && ((c >> ss_x) & 3) == 0) { - if (!skip_border_4x4_c) - mask_16x16_c |= 1 << (c >> ss_x); - else - mask_8x8_c |= 1 << (c >> ss_x); - } - if (!skip_this_r && ((r >> ss_y) & 3) == 0) { - if (!skip_border_4x4_r) - mask_16x16[r] |= 1 << (c >> ss_x); - else - mask_8x8[r] |= 1 << (c >> ss_x); - } - } else if (tx_size == TX_16X16) { - if (!skip_this_c && ((c >> ss_x) & 1) == 0) { - if (!skip_border_4x4_c) - mask_16x16_c |= 1 << (c >> ss_x); - else - mask_8x8_c |= 1 << (c >> ss_x); - } - if (!skip_this_r && ((r >> ss_y) & 1) == 0) { - if (!skip_border_4x4_r) - mask_16x16[r] |= 1 << (c >> ss_x); - else - mask_8x8[r] |= 1 << (c >> ss_x); - } - } else { - // force 8x8 filtering on 32x32 boundaries - if (!skip_this_c) { - if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) - mask_8x8_c |= 1 << (c >> ss_x); - else - mask_4x4_c |= 1 << (c >> ss_x); - } - - if (!skip_this_r) { - if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) - mask_8x8[r] |= 1 << (c >> ss_x); - else - mask_4x4[r] |= 1 << (c >> ss_x); - } - - if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) - mask_4x4_int[r] |= 1 << (c >> ss_x); - } - } - - // Disable filtering on the leftmost column - border_mask = ~(mi_col == 0); -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - cm->lf_info.lfthr, &lfl[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - cm->lf_info.lfthr, &lfl[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - mi_8x8 += row_step_stride; - } - - // Now do horizontal pass - dst->buf = dst0; - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { - const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; - const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; - - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16[r]; - mask_8x8_r = mask_8x8[r]; - mask_4x4_r = mask_4x4[r]; - } -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - cm->lf_info.lfthr, &lfl[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - cm->lf_info.lfthr, &lfl[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - } -} - -void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { - struct buf_2d *const dst = &plane->dst; - uint8_t *const dst0 = dst->buf; - int r; - uint64_t mask_16x16 = lfm->left_y[TX_16X16]; - uint64_t mask_8x8 = lfm->left_y[TX_8X8]; - uint64_t mask_4x4 = lfm->left_y[TX_4X4]; - uint64_t mask_4x4_int = lfm->int_4x4_y; - - assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); - - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - // Disable filtering on the leftmost column. -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->subsampling_x, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, - &lfm->lfl_y[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 16 * dst->stride; - mask_16x16 >>= 16; - mask_8x8 >>= 16; - mask_4x4 >>= 16; - mask_4x4_int >>= 16; - } - - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_y[TX_16X16]; - mask_8x8 = lfm->above_y[TX_8X8]; - mask_4x4 = lfm->above_y[TX_4X4]; - mask_4x4_int = lfm->int_4x4_y; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xff; - mask_8x8_r = mask_8x8 & 0xff; - mask_4x4_r = mask_4x4 & 0xff; - } - -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, - cm->lf_info.lfthr, &lfm->lfl_y[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, - cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 8 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; - } -} - -void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { - struct buf_2d *const dst = &plane->dst; - uint8_t *const dst0 = dst->buf; - int r, c; - uint8_t lfl_uv[16]; - - uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; - uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; - uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; - uint16_t mask_4x4_int = lfm->int_4x4_uv; - - assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); - - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { - for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { - lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; - lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; - } - - // Disable filtering on the leftmost column. -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->subsampling_x, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, &lfl_uv[r << 1], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, &lfl_uv[r << 1]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; - } - - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_uv[TX_16X16]; - mask_8x8 = lfm->above_uv[TX_8X8]; - mask_4x4 = lfm->above_uv[TX_4X4]; - mask_4x4_int = lfm->int_4x4_uv; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; - const unsigned int mask_4x4_int_r = - skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xf; - mask_8x8_r = mask_8x8 & 0xf; - mask_4x4_r = mask_4x4 & 0xf; - } - -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, - cm->lf_info.lfthr, &lfl_uv[r << 1], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, - &lfl_uv[r << 1]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 8 * dst->stride; - mask_16x16 >>= 4; - mask_8x8 >>= 4; - mask_4x4 >>= 4; - mask_4x4_int >>= 4; - } -} - -static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only) { - const int num_planes = y_only ? 1 : MAX_MB_PLANE; - enum lf_path path; - int mi_row, mi_col; - - if (y_only) - path = LF_PATH_444; - else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) - path = LF_PATH_420; - else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) - path = LF_PATH_444; - else - path = LF_PATH_SLOW; - - for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { - MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); - - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { - int plane; - - vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - - // TODO(jimbankoski): For 444 only need to do y mask. - vp9_adjust_mask(cm, mi_row, mi_col, lfm); - - vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); - for (plane = 1; plane < num_planes; ++plane) { - switch (path) { - case LF_PATH_420: - vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_444: - vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_SLOW: - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); - break; - } - } - } - } -} - -void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, - VP9_COMMON *cm, MACROBLOCKD *xd, - int frame_filter_level, - int y_only, int partial_frame) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - if (!frame_filter_level) return; - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); -} - -// Used by the encoder to build the loopfilter masks. -// TODO(slavarnway): Do the encoder the same way the decoder does it and -// build the masks in line as part of the encode process. -void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, - int partial_frame) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - int mi_col, mi_row; - if (!frame_filter_level) return; - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - - vp9_loop_filter_frame_init(cm, frame_filter_level); - - for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) { - MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { - // vp9_setup_mask() zeros lfm - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - get_lfm(&cm->lf, mi_row, mi_col)); - } - } -} - -// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 -// or greater area. -static const uint8_t first_block_in_16x16[8][8] = { - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0} -}; - -// This function sets up the bit masks for a block represented -// by mi_row, mi_col in a 64x64 region. -// TODO(SJL): This function only works for yv12. -void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, - int mi_col, int bw, int bh) { - const BLOCK_SIZE block_size = mi->sb_type; - const TX_SIZE tx_size_y = mi->tx_size; - const loop_filter_info_n *const lfi_n = &cm->lf_info; - const int filter_level = get_filter_level(lfi_n, mi); - const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); - LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); - uint64_t *const left_y = &lfm->left_y[tx_size_y]; - uint64_t *const above_y = &lfm->above_y[tx_size_y]; - uint64_t *const int_4x4_y = &lfm->int_4x4_y; - uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; - uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; - uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; - const int row_in_sb = (mi_row & 7); - const int col_in_sb = (mi_col & 7); - const int shift_y = col_in_sb + (row_in_sb << 3); - const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2); - const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb]; - - if (!filter_level) { - return; - } else { - int index = shift_y; - int i; - for (i = 0; i < bh; i++) { - memset(&lfm->lfl_y[index], filter_level, bw); - index += 8; - } - } - - // These set 1 in the current block size for the block size edges. - // For instance if the block size is 32x16, we'll set: - // above = 1111 - // 0000 - // and - // left = 1000 - // = 1000 - // NOTE : In this example the low bit is left most ( 1000 ) is stored as - // 1, not 8... - // - // U and V set things on a 16 bit scale. - // - *above_y |= above_prediction_mask[block_size] << shift_y; - *left_y |= left_prediction_mask[block_size] << shift_y; - - if (build_uv) { - *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; - *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; - } - - // If the block has no coefficients and is not intra we skip applying - // the loop filter on block edges. - if (mi->skip && is_inter_block(mi)) - return; - - // Add a mask for the transform size. The transform size mask is set to - // be correct for a 64x64 prediction block size. Mask to match the size of - // the block we are working on and then shift it into place. - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; - - if (build_uv) { - *above_uv |= (size_mask_uv[block_size] & - above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - - *left_uv |= (size_mask_uv[block_size] & - left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - } - - // Try to determine what to do with the internal 4x4 block boundaries. These - // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the - // internal ones can be skipped and don't depend on the prediction block size. - if (tx_size_y == TX_4X4) - *int_4x4_y |= size_mask[block_size] << shift_y; - - if (build_uv && tx_size_uv == TX_4X4) - *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; -} - -void vp9_loop_filter_data_reset( - LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, - struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { - lf_data->frame_buffer = frame_buffer; - lf_data->cm = cm; - lf_data->start = 0; - lf_data->stop = 0; - lf_data->y_only = 0; - memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); -} - -void vp9_reset_lfm(VP9_COMMON *const cm) { - if (cm->lf.filter_level) { - memset(cm->lf.lfm, 0, - ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride * - sizeof(*cm->lf.lfm)); - } -} - -int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { - (void)unused; - loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only); - return 1; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.h b/thirdparty/libvpx/vp9/common/vp9_loopfilter.h deleted file mode 100644 index fca8830fa1..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_loopfilter.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_LOOPFILTER_H_ -#define VP9_COMMON_VP9_LOOPFILTER_H_ - -#include "vpx_ports/mem.h" -#include "./vpx_config.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_seg_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_LOOP_FILTER 63 -#define MAX_SHARPNESS 7 - -#define SIMD_WIDTH 16 - -#define MAX_REF_LF_DELTAS 4 -#define MAX_MODE_LF_DELTAS 2 - -enum lf_path { - LF_PATH_420, - LF_PATH_444, - LF_PATH_SLOW, -}; - -// Need to align this structure so when it is declared and -// passed it can be loaded into vector registers. -typedef struct { - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]); -} loop_filter_thresh; - -typedef struct { - loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; - uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; -} loop_filter_info_n; - -// This structure holds bit masks for all 8x8 blocks in a 64x64 region. -// Each 1 bit represents a position in which we want to apply the loop filter. -// Left_ entries refer to whether we apply a filter on the border to the -// left of the block. Above_ entries refer to whether or not to apply a -// filter on the above border. Int_ entries refer to whether or not to -// apply borders on the 4x4 edges within the 8x8 block that each bit -// represents. -// Since each transform is accompanied by a potentially different type of -// loop filter there is a different entry in the array for each transform size. -typedef struct { - uint64_t left_y[TX_SIZES]; - uint64_t above_y[TX_SIZES]; - uint64_t int_4x4_y; - uint16_t left_uv[TX_SIZES]; - uint16_t above_uv[TX_SIZES]; - uint16_t int_4x4_uv; - uint8_t lfl_y[64]; -} LOOP_FILTER_MASK; - -struct loopfilter { - int filter_level; - int last_filt_level; - - int sharpness_level; - int last_sharpness_level; - - uint8_t mode_ref_delta_enabled; - uint8_t mode_ref_delta_update; - - // 0 = Intra, Last, GF, ARF - signed char ref_deltas[MAX_REF_LF_DELTAS]; - signed char last_ref_deltas[MAX_REF_LF_DELTAS]; - - // 0 = ZERO_MV, MV - signed char mode_deltas[MAX_MODE_LF_DELTAS]; - signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; - - LOOP_FILTER_MASK *lfm; - int lfm_stride; -}; - -/* assorted loopfilter functions which get used elsewhere */ -struct VP9Common; -struct macroblockd; -struct VP9LfSyncData; - -// This function sets up the bit masks for the entire 64x64 region represented -// by mi_row, mi_col. -void vp9_setup_mask(struct VP9Common *const cm, - const int mi_row, const int mi_col, - MODE_INFO **mi_8x8, const int mode_info_stride, - LOOP_FILTER_MASK *lfm); - -void vp9_filter_block_plane_ss00(struct VP9Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); - -void vp9_filter_block_plane_ss11(struct VP9Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); - -void vp9_filter_block_plane_non420(struct VP9Common *cm, - struct macroblockd_plane *plane, - MODE_INFO **mi_8x8, - int mi_row, int mi_col); - -void vp9_loop_filter_init(struct VP9Common *cm); - -// Update the loop filter for the current frame. -// This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame() -// calls this function directly. -void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); - -void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, - struct VP9Common *cm, - struct macroblockd *mbd, - int filter_level, - int y_only, int partial_frame); - -// Get the superblock lfm for a given mi_row, mi_col. -static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, - const int mi_row, const int mi_col) { - return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; -} - -void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row, - int mi_col, int bw, int bh); -void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, - const int mi_col, LOOP_FILTER_MASK *lfm); -void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level, - int partial_frame); -void vp9_reset_lfm(struct VP9Common *const cm); - -typedef struct LoopFilterWorkerData { - YV12_BUFFER_CONFIG *frame_buffer; - struct VP9Common *cm; - struct macroblockd_plane planes[MAX_MB_PLANE]; - - int start; - int stop; - int y_only; -} LFWorkerData; - -void vp9_loop_filter_data_reset( - LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, - struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]); - -// Operates on the rows described by 'lf_data'. -int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mv.h b/thirdparty/libvpx/vp9/common/vp9_mv.h deleted file mode 100644 index 5d89da8c25..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_mv.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_MV_H_ -#define VP9_COMMON_VP9_MV_H_ - -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct mv { - int16_t row; - int16_t col; -} MV; - -typedef union int_mv { - uint32_t as_int; - MV as_mv; -} int_mv; /* facilitates faster equality tests and copies */ - -typedef struct mv32 { - int32_t row; - int32_t col; -} MV32; - -static INLINE int is_zero_mv(const MV *mv) { - return *((const uint32_t *)mv) == 0; -} - -static INLINE int is_equal_mv(const MV *a, const MV *b) { - return *((const uint32_t *)a) == *((const uint32_t *)b); -} - -static INLINE void clamp_mv(MV *mv, int min_col, int max_col, - int min_row, int max_row) { - mv->col = clamp(mv->col, min_col, max_col); - mv->row = clamp(mv->row, min_row, max_row); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_MV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.c b/thirdparty/libvpx/vp9/common/vp9_mvref_common.c deleted file mode 100644 index 0eb01a51ba..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_mvref_common.c +++ /dev/null @@ -1,201 +0,0 @@ - -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_mvref_common.h" - -// This function searches the neighborhood of a given MB/SB -// to try and find candidate reference vectors. -static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int block, int mi_row, int mi_col, - uint8_t *mode_context) { - const int *ref_sign_bias = cm->ref_frame_sign_bias; - int i, refmv_count = 0; - const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; - int different_ref_found = 0; - int context_counter = 0; - const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? - cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; - const TileInfo *const tile = &xd->tile; - - // Blank the reference vector list - memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); - - // The nearest 2 blocks are treated differently - // if the size < 8x8 we get the mv from the bmi substructure, - // and we also need to keep a mode count. - for (i = 0; i < 2; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride]; - // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate_mi->mode]; - different_ref_found = 1; - - if (candidate_mi->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - else if (candidate_mi->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - } - } - - // Check the rest of the neighbors in much the same way - // as before except we don't need to keep track of sub blocks or - // mode counts. - for (; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - different_ref_found = 1; - - if (candidate_mi->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); - else if (candidate_mi->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // Check the last frame's mode and mv info. - if (cm->use_prev_frame_mvs) { - if (prev_frame_mvs->ref_frame[0] == ref_frame) { - ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); - } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { - ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // Since we couldn't find 2 mvs from the same reference frame - // go back through the neighbors and find motion vectors from - // different reference frames. - if (different_ref_found) { - for (i = 0; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - - // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, - refmv_count, mv_ref_list, Done); - } - } - } - - // Since we still don't have a candidate we'll try the last frame. - if (cm->use_prev_frame_mvs) { - if (prev_frame_mvs->ref_frame[0] != ref_frame && - prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { - int_mv mv = prev_frame_mvs->mv[0]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); - } - - if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && - prev_frame_mvs->ref_frame[1] != ref_frame && - prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { - int_mv mv = prev_frame_mvs->mv[1]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); - } - } - - Done: - - mode_context[ref_frame] = counter_to_context[context_counter]; - - // Clamp vectors - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) - clamp_mv_ref(&mv_ref_list[i].as_mv, xd); -} - -void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int mi_row, int mi_col, - uint8_t *mode_context) { - find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col, mode_context); -} - -void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, - int_mv *mvlist, int_mv *nearest_mv, - int_mv *near_mv) { - int i; - // Make sure all the candidates are properly clamped etc - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - lower_mv_precision(&mvlist[i].as_mv, allow_hp); - clamp_mv2(&mvlist[i].as_mv, xd); - } - *nearest_mv = mvlist[0]; - *near_mv = mvlist[1]; -} - -void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, - int_mv *nearest_mv, int_mv *near_mv, - uint8_t *mode_context) { - int_mv mv_list[MAX_MV_REF_CANDIDATES]; - MODE_INFO *const mi = xd->mi[0]; - b_mode_info *bmi = mi->bmi; - int n; - - assert(MAX_MV_REF_CANDIDATES == 2); - - find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, - mi_row, mi_col, mode_context); - - near_mv->as_int = 0; - switch (block) { - case 0: - nearest_mv->as_int = mv_list[0].as_int; - near_mv->as_int = mv_list[1].as_int; - break; - case 1: - case 2: - nearest_mv->as_int = bmi[0].as_mv[ref].as_int; - for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n) - if (nearest_mv->as_int != mv_list[n].as_int) { - near_mv->as_int = mv_list[n].as_int; - break; - } - break; - case 3: { - int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; - candidates[0] = bmi[1].as_mv[ref]; - candidates[1] = bmi[0].as_mv[ref]; - candidates[2] = mv_list[0]; - candidates[3] = mv_list[1]; - - nearest_mv->as_int = bmi[2].as_mv[ref].as_int; - for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) - if (nearest_mv->as_int != candidates[n].as_int) { - near_mv->as_int = candidates[n].as_int; - break; - } - break; - } - default: - assert(0 && "Invalid block index."); - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.h b/thirdparty/libvpx/vp9/common/vp9_mvref_common.h deleted file mode 100644 index 4380843e24..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_mvref_common.h +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ -#define VP9_COMMON_VP9_MVREF_COMMON_H_ - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3) -#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\ - VP9_INTERP_EXTEND) << 3) - -#define MVREF_NEIGHBOURS 8 - -typedef struct position { - int row; - int col; -} POSITION; - -typedef enum { - BOTH_ZERO = 0, - ZERO_PLUS_PREDICTED = 1, - BOTH_PREDICTED = 2, - NEW_PLUS_NON_INTRA = 3, - BOTH_NEW = 4, - INTRA_PLUS_NON_INTRA = 5, - BOTH_INTRA = 6, - INVALID_CASE = 9 -} motion_vector_context; - -// This is used to figure out a context for the ref blocks. The code flattens -// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by -// adding 9 for each intra block, 3 for each zero mv and 1 for each new -// motion vector. This single number is then converted into a context -// with a single lookup ( counter_to_context ). -static const int mode_2_counter[MB_MODE_COUNT] = { - 9, // DC_PRED - 9, // V_PRED - 9, // H_PRED - 9, // D45_PRED - 9, // D135_PRED - 9, // D117_PRED - 9, // D153_PRED - 9, // D207_PRED - 9, // D63_PRED - 9, // TM_PRED - 0, // NEARESTMV - 0, // NEARMV - 3, // ZEROMV - 1, // NEWMV -}; - -// There are 3^3 different combinations of 3 counts that can be either 0,1 or -// 2. However the actual count can never be greater than 2 so the highest -// counter we need is 18. 9 is an invalid counter that's never used. -static const int counter_to_context[19] = { - BOTH_PREDICTED, // 0 - NEW_PLUS_NON_INTRA, // 1 - BOTH_NEW, // 2 - ZERO_PLUS_PREDICTED, // 3 - NEW_PLUS_NON_INTRA, // 4 - INVALID_CASE, // 5 - BOTH_ZERO, // 6 - INVALID_CASE, // 7 - INVALID_CASE, // 8 - INTRA_PLUS_NON_INTRA, // 9 - INTRA_PLUS_NON_INTRA, // 10 - INVALID_CASE, // 11 - INTRA_PLUS_NON_INTRA, // 12 - INVALID_CASE, // 13 - INVALID_CASE, // 14 - INVALID_CASE, // 15 - INVALID_CASE, // 16 - INVALID_CASE, // 17 - BOTH_INTRA // 18 -}; - -static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { - // 4X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 4X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X16 - {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, - // 16X8 - {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, - // 16X16 - {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 16X32 - {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, - // 32X16 - {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 32X32 - {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 32X64 - {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, - // 64X32 - {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, - // 64X64 - {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} -}; - -static const int idx_n_column_to_subblock[4][2] = { - {1, 2}, - {1, 3}, - {3, 2}, - {3, 3} -}; - -// clamp_mv_ref -#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units - -static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { - clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, - xd->mb_to_right_edge + MV_BORDER, - xd->mb_to_top_edge - MV_BORDER, - xd->mb_to_bottom_edge + MV_BORDER); -} - -// This function returns either the appropriate sub block or block's mv -// on whether the block_size < 8x8 and we have check_sub_blocks set. -static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, - int search_col, int block_idx) { - return block_idx >= 0 && candidate->sb_type < BLOCK_8X8 - ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] - .as_mv[which_mv] - : candidate->mv[which_mv]; -} - - -// Performs mv sign inversion if indicated by the reference frame combination. -static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, - const MV_REFERENCE_FRAME this_ref_frame, - const int *ref_sign_bias) { - int_mv mv = mi->mv[ref]; - if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - return mv; -} - -// This macro is used to add a motion vector mv_ref list if it isn't -// already in the list. If it's the second motion vector it will also -// skip all additional processing and jump to Done! -#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \ - do { \ - if (refmv_count) { \ - if ((mv).as_int != (mv_ref_list)[0].as_int) { \ - (mv_ref_list)[(refmv_count)] = (mv); \ - goto Done; \ - } \ - } else { \ - (mv_ref_list)[(refmv_count)++] = (mv); \ - } \ - } while (0) - -// If either reference frame is different, not INTRA, and they -// are different from each other scale and add the mv to our list. -#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \ - mv_ref_list, Done) \ - do { \ - if (is_inter_block(mbmi)) { \ - if ((mbmi)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - if (has_second_ref(mbmi) && \ - (mbmi)->ref_frame[1] != ref_frame && \ - (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - } \ - } while (0) - - -// Checks that the given mi_row, mi_col and search point -// are inside the borders of the tile. -static INLINE int is_inside(const TileInfo *const tile, - int mi_col, int mi_row, int mi_rows, - const POSITION *mi_pos) { - return !(mi_row + mi_pos->row < 0 || - mi_col + mi_pos->col < tile->mi_col_start || - mi_row + mi_pos->row >= mi_rows || - mi_col + mi_pos->col >= tile->mi_col_end); -} - -// TODO(jingning): this mv clamping function should be block size dependent. -static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { - clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); -} - -static INLINE void lower_mv_precision(MV *mv, int allow_hp) { - const int use_hp = allow_hp && use_mv_hp(mv); - if (!use_hp) { - if (mv->row & 1) - mv->row += (mv->row > 0 ? -1 : 1); - if (mv->col & 1) - mv->col += (mv->col > 0 ? -1 : 1); - } -} - -typedef void (*find_mv_refs_sync)(void *const data, int mi_row); -void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, int mi_row, int mi_col, - uint8_t *mode_context); - -// check a list of motion vectors by sad score using a number rows of pixels -// above and a number cols of pixels in the left to select the one with best -// score to use as ref motion vector -void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, - int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv); - -void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, - int_mv *nearest_mv, int_mv *near_mv, - uint8_t *mode_context); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h b/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h deleted file mode 100644 index 3fd935e628..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ONYXC_INT_H_ -#define VP9_COMMON_VP9_ONYXC_INT_H_ - -#include "./vpx_config.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx_util/vpx_thread.h" -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_frame_buffers.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_tile_common.h" - -#if CONFIG_VP9_POSTPROC -#include "vp9/common/vp9_postproc.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#define REFS_PER_FRAME 3 - -#define REF_FRAMES_LOG2 3 -#define REF_FRAMES (1 << REF_FRAMES_LOG2) - -// 4 scratch frames for the new frames to support a maximum of 4 cores decoding -// in parallel, 3 for scaled references on the encoder. -// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number -// of framebuffers. -// TODO(jkoleszar): These 3 extra references could probably come from the -// normal reference pool. -#define FRAME_BUFFERS (REF_FRAMES + 7) - -#define FRAME_CONTEXTS_LOG2 2 -#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) - -#define NUM_PING_PONG_BUFFERS 2 - -extern const struct { - PARTITION_CONTEXT above; - PARTITION_CONTEXT left; -} partition_context_lookup[BLOCK_SIZES]; - - -typedef enum { - SINGLE_REFERENCE = 0, - COMPOUND_REFERENCE = 1, - REFERENCE_MODE_SELECT = 2, - REFERENCE_MODES = 3, -} REFERENCE_MODE; - -typedef struct { - int_mv mv[2]; - MV_REFERENCE_FRAME ref_frame[2]; -} MV_REF; - -typedef struct { - int ref_count; - MV_REF *mvs; - int mi_rows; - int mi_cols; - vpx_codec_frame_buffer_t raw_frame_buffer; - YV12_BUFFER_CONFIG buf; - - // The Following variables will only be used in frame parallel decode. - - // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means - // that no FrameWorker owns, or is decoding, this buffer. - VPxWorker *frame_worker_owner; - - // row and col indicate which position frame has been decoded to in real - // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX - // when the frame is fully decoded. - int row; - int col; -} RefCntBuffer; - -typedef struct BufferPool { - // Protect BufferPool from being accessed by several FrameWorkers at - // the same time during frame parallel decode. - // TODO(hkuang): Try to use atomic variable instead of locking the whole pool. -#if CONFIG_MULTITHREAD - pthread_mutex_t pool_mutex; -#endif - - // Private data associated with the frame buffer callbacks. - void *cb_priv; - - vpx_get_frame_buffer_cb_fn_t get_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_fb_cb; - - RefCntBuffer frame_bufs[FRAME_BUFFERS]; - - // Frame buffers allocated internally by the codec. - InternalFrameBufferList int_frame_buffers; -} BufferPool; - -typedef struct VP9Common { - struct vpx_internal_error_info error; - vpx_color_space_t color_space; - vpx_color_range_t color_range; - int width; - int height; - int render_width; - int render_height; - int last_width; - int last_height; - - // TODO(jkoleszar): this implies chroma ss right now, but could vary per - // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to - // support additional planes. - int subsampling_x; - int subsampling_y; - -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth; // Marks if we need to use 16bit frame buffers. -#endif - - YV12_BUFFER_CONFIG *frame_to_show; - RefCntBuffer *prev_frame; - - // TODO(hkuang): Combine this with cur_buf in macroblockd. - RefCntBuffer *cur_frame; - - int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ - - // Prepare ref_frame_map for the next frame. - // Only used in frame parallel decode. - int next_ref_frame_map[REF_FRAMES]; - - // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and - // roll new_fb_idx into it. - - // Each frame can reference REFS_PER_FRAME buffers - RefBuffer frame_refs[REFS_PER_FRAME]; - - int new_fb_idx; - -#if CONFIG_VP9_POSTPROC - YV12_BUFFER_CONFIG post_proc_buffer; - YV12_BUFFER_CONFIG post_proc_buffer_int; -#endif - - FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ - FRAME_TYPE frame_type; - - int show_frame; - int last_show_frame; - int show_existing_frame; - - // Flag signaling that the frame is encoded using only INTRA modes. - uint8_t intra_only; - uint8_t last_intra_only; - - int allow_high_precision_mv; - - // Flag signaling that the frame context should be reset to default values. - // 0 or 1 implies don't reset, 2 reset just the context specified in the - // frame header, 3 reset all contexts. - int reset_frame_context; - - // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in - // MODE_INFO (8-pixel) units. - int MBs; - int mb_rows, mi_rows; - int mb_cols, mi_cols; - int mi_stride; - - /* profile settings */ - TX_MODE tx_mode; - - int base_qindex; - int y_dc_delta_q; - int uv_dc_delta_q; - int uv_ac_delta_q; - int16_t y_dequant[MAX_SEGMENTS][2]; - int16_t uv_dequant[MAX_SEGMENTS][2]; - - /* We allocate a MODE_INFO struct for each macroblock, together with - an extra row on top and column on the left to simplify prediction. */ - int mi_alloc_size; - MODE_INFO *mip; /* Base of allocated array */ - MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ - - // TODO(agrange): Move prev_mi into encoder structure. - // prev_mip and prev_mi will only be allocated in VP9 encoder. - MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ - MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ - - // Separate mi functions between encoder and decoder. - int (*alloc_mi)(struct VP9Common *cm, int mi_size); - void (*free_mi)(struct VP9Common *cm); - void (*setup_mi)(struct VP9Common *cm); - - // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible - // area will be NULL. - MODE_INFO **mi_grid_base; - MODE_INFO **mi_grid_visible; - MODE_INFO **prev_mi_grid_base; - MODE_INFO **prev_mi_grid_visible; - - // Whether to use previous frame's motion vectors for prediction. - int use_prev_frame_mvs; - - // Persistent mb segment id map used in prediction. - int seg_map_idx; - int prev_seg_map_idx; - - uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS]; - uint8_t *last_frame_seg_map; - uint8_t *current_frame_seg_map; - int seg_map_alloc_size; - - INTERP_FILTER interp_filter; - - loop_filter_info_n lf_info; - - int refresh_frame_context; /* Two state 0 = NO, 1 = YES */ - - int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ - - struct loopfilter lf; - struct segmentation seg; - - // TODO(hkuang): Remove this as it is the same as frame_parallel_decode - // in pbi. - int frame_parallel_decode; // frame-based threading. - - // Context probabilities for reference frame prediction - MV_REFERENCE_FRAME comp_fixed_ref; - MV_REFERENCE_FRAME comp_var_ref[2]; - REFERENCE_MODE reference_mode; - - FRAME_CONTEXT *fc; /* this frame entropy */ - FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS - unsigned int frame_context_idx; /* Context to use/update */ - FRAME_COUNTS counts; - - unsigned int current_video_frame; - BITSTREAM_PROFILE profile; - - // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3. - vpx_bit_depth_t bit_depth; - vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer - -#if CONFIG_VP9_POSTPROC - struct postproc_state postproc_state; -#endif - - int error_resilient_mode; - int frame_parallel_decoding_mode; - - int log2_tile_cols, log2_tile_rows; - int byte_alignment; - int skip_loop_filter; - - // Private data associated with the frame buffer callbacks. - void *cb_priv; - vpx_get_frame_buffer_cb_fn_t get_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_fb_cb; - - // Handles memory for the codec. - InternalFrameBufferList int_frame_buffers; - - // External BufferPool passed from outside. - BufferPool *buffer_pool; - - PARTITION_CONTEXT *above_seg_context; - ENTROPY_CONTEXT *above_context; - int above_context_alloc_cols; -} VP9_COMMON; - -// TODO(hkuang): Don't need to lock the whole pool after implementing atomic -// frame reference count. -void lock_buffer_pool(BufferPool *const pool); -void unlock_buffer_pool(BufferPool *const pool); - -static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) { - if (index < 0 || index >= REF_FRAMES) - return NULL; - if (cm->ref_frame_map[index] < 0) - return NULL; - assert(cm->ref_frame_map[index] < FRAME_BUFFERS); - return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; -} - -static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { - return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; -} - -static INLINE int get_free_fb(VP9_COMMON *cm) { - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - int i; - - lock_buffer_pool(cm->buffer_pool); - for (i = 0; i < FRAME_BUFFERS; ++i) - if (frame_bufs[i].ref_count == 0) - break; - - if (i != FRAME_BUFFERS) { - frame_bufs[i].ref_count = 1; - } else { - // Reset i to be INVALID_IDX to indicate no free buffer found. - i = INVALID_IDX; - } - - unlock_buffer_pool(cm->buffer_pool); - return i; -} - -static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) { - const int ref_index = *idx; - - if (ref_index >= 0 && bufs[ref_index].ref_count > 0) - bufs[ref_index].ref_count--; - - *idx = new_idx; - - bufs[new_idx].ref_count++; -} - -static INLINE int mi_cols_aligned_to_sb(int n_mis) { - return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); -} - -static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { - return cm->frame_type == KEY_FRAME || cm->intra_only; -} - -static INLINE void set_partition_probs(const VP9_COMMON *const cm, - MACROBLOCKD *const xd) { - xd->partition_probs = - frame_is_intra_only(cm) ? - &vp9_kf_partition_probs[0] : - (const vpx_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob; -} - -static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd, - tran_low_t *dqcoeff) { - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - xd->plane[i].dqcoeff = dqcoeff; - xd->above_context[i] = cm->above_context + - i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); - - if (get_plane_type(i) == PLANE_TYPE_Y) { - memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); - } else { - memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); - } - xd->fc = cm->fc; - } - - xd->above_seg_context = cm->above_seg_context; - xd->mi_stride = cm->mi_stride; - xd->error_info = &cm->error; - - set_partition_probs(cm, xd); -} - -static INLINE const vpx_prob* get_partition_probs(const MACROBLOCKD *xd, - int ctx) { - return xd->partition_probs[ctx]; -} - -static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { - const int above_idx = mi_col * 2; - const int left_idx = (mi_row * 2) & 15; - int i; - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; - pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; - } -} - -static INLINE int calc_mi_size(int len) { - // len is in mi units. - return len + MI_BLOCK_SIZE; -} - -static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, - int mi_row, int bh, - int mi_col, int bw, - int mi_rows, int mi_cols) { - xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); - xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8; - xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); - xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; - - // Are edges available for intra prediction? - xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL; - xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL; -} - -static INLINE void update_partition_context(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE subsize, - BLOCK_SIZE bsize) { - PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - - // num_4x4_blocks_wide_lookup[bsize] / 2 - const int bs = num_8x8_blocks_wide_lookup[bsize]; - - // update the partition context at the end notes. set partition bits - // of block sizes larger than the current one to be one, and partition - // bits of smaller block sizes to be zero. - memset(above_ctx, partition_context_lookup[subsize].above, bs); - memset(left_ctx, partition_context_lookup[subsize].left, bs); -} - -static INLINE int partition_plane_context(const MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - const int bsl = mi_width_log2_lookup[bsize]; - int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; - - assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); - assert(bsl >= 0); - - return (left * 2 + above) + bsl * PARTITION_PLOFFSET; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_ppflags.h b/thirdparty/libvpx/vp9/common/vp9_ppflags.h deleted file mode 100644 index 12b989f43a..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_ppflags.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_PPFLAGS_H_ -#define VP9_COMMON_VP9_PPFLAGS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - VP9D_NOFILTERING = 0, - VP9D_DEBLOCK = 1 << 0, - VP9D_DEMACROBLOCK = 1 << 1, - VP9D_ADDNOISE = 1 << 2, - VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3, - VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4, - VP9D_DEBUG_TXT_DC_DIFF = 1 << 5, - VP9D_DEBUG_TXT_RATE_INFO = 1 << 6, - VP9D_DEBUG_DRAW_MV = 1 << 7, - VP9D_DEBUG_CLR_BLK_MODES = 1 << 8, - VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9, - VP9D_MFQE = 1 << 10 -}; - -typedef struct { - int post_proc_flag; - int deblocking_level; - int noise_level; -} vp9_ppflags_t; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_PPFLAGS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.c b/thirdparty/libvpx/vp9/common/vp9_pred_common.c deleted file mode 100644 index 8f90e70e73..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_pred_common.c +++ /dev/null @@ -1,314 +0,0 @@ - -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_seg_common.h" - -// Returns a context number for the given MB prediction signal -int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - const MODE_INFO *const left_mi = xd->left_mi; - const int left_type = left_mi && is_inter_block(left_mi) ? - left_mi->interp_filter : SWITCHABLE_FILTERS; - const MODE_INFO *const above_mi = xd->above_mi; - const int above_type = above_mi && is_inter_block(above_mi) ? - above_mi->interp_filter : SWITCHABLE_FILTERS; - - if (left_type == above_type) - return left_type; - else if (left_type == SWITCHABLE_FILTERS) - return above_type; - else if (above_type == SWITCHABLE_FILTERS) - return left_type; - else - return SWITCHABLE_FILTERS; -} - -int vp9_get_reference_mode_context(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - int ctx; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - if (has_above && has_left) { // both edges available - if (!has_second_ref(above_mi) && !has_second_ref(left_mi)) - // neither edge uses comp pred (0/1) - ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^ - (left_mi->ref_frame[0] == cm->comp_fixed_ref); - else if (!has_second_ref(above_mi)) - // one of two edges uses comp pred (2/3) - ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(above_mi)); - else if (!has_second_ref(left_mi)) - // one of two edges uses comp pred (2/3) - ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(left_mi)); - else // both edges use comp pred (4) - ctx = 4; - } else if (has_above || has_left) { // one edge available - const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - - if (!has_second_ref(edge_mi)) - // edge does not use comp pred (0/1) - ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref; - else - // edge uses comp pred (3) - ctx = 3; - } else { // no edges available (1) - ctx = 1; - } - assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS); - return ctx; -} - -// Returns a context number for the given MB prediction signal -int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - int pred_context; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int above_in_image = !!above_mi; - const int left_in_image = !!left_mi; - - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; - const int var_ref_idx = !fix_ref_idx; - - if (above_in_image && left_in_image) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - - if (above_intra && left_intra) { // intra/intra (2) - pred_context = 2; - } else if (above_intra || left_intra) { // intra/inter - const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - - if (!has_second_ref(edge_mi)) // single pred (1/3) - pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); - else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx] - != cm->comp_var_ref[1]); - } else { // inter/inter - const int l_sg = !has_second_ref(left_mi); - const int a_sg = !has_second_ref(above_mi); - const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0] - : above_mi->ref_frame[var_ref_idx]; - const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0] - : left_mi->ref_frame[var_ref_idx]; - - if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { - pred_context = 0; - } else if (l_sg && a_sg) { // single/single - if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) || - (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0])) - pred_context = 4; - else if (vrfa == vrfl) - pred_context = 3; - else - pred_context = 1; - } else if (l_sg || a_sg) { // single/comp - const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; - const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; - if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1]) - pred_context = 1; - else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1]) - pred_context = 2; - else - pred_context = 4; - } else if (vrfa == vrfl) { // comp/comp - pred_context = 4; - } else { - pred_context = 2; - } - } - } else if (above_in_image || left_in_image) { // one edge available - const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi; - - if (!is_inter_block(edge_mi)) { - pred_context = 2; - } else { - if (has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[var_ref_idx] - != cm->comp_var_ref[1]); - else - pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); - } - } else { // no edges available (2) - pred_context = 2; - } - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - - return pred_context; -} - -int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { - int pred_context; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - - if (above_intra && left_intra) { // intra/intra - pred_context = 2; - } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - if (!has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); - else - pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || - edge_mi->ref_frame[1] == LAST_FRAME); - } else { // inter/inter - const int above_has_second = has_second_ref(above_mi); - const int left_has_second = has_second_ref(left_mi); - const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; - - if (above_has_second && left_has_second) { - pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME || - left0 == LAST_FRAME || left1 == LAST_FRAME); - } else if (above_has_second || left_has_second) { - const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; - - if (rfs == LAST_FRAME) - pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME); - else - pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME); - } else { - pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME); - } - } - } else if (has_above || has_left) { // one edge available - const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - if (!is_inter_block(edge_mi)) { // intra - pred_context = 2; - } else { // inter - if (!has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); - else - pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || - edge_mi->ref_frame[1] == LAST_FRAME); - } - } else { // no edges available - pred_context = 2; - } - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { - int pred_context; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - - if (above_intra && left_intra) { // intra/intra - pred_context = 2; - } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - if (!has_second_ref(edge_mi)) { - if (edge_mi->ref_frame[0] == LAST_FRAME) - pred_context = 3; - else - pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); - } else { - pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || - edge_mi->ref_frame[1] == GOLDEN_FRAME); - } - } else { // inter/inter - const int above_has_second = has_second_ref(above_mi); - const int left_has_second = has_second_ref(left_mi); - const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; - - if (above_has_second && left_has_second) { - if (above0 == left0 && above1 == left1) - pred_context = 3 * (above0 == GOLDEN_FRAME || - above1 == GOLDEN_FRAME || - left0 == GOLDEN_FRAME || - left1 == GOLDEN_FRAME); - else - pred_context = 2; - } else if (above_has_second || left_has_second) { - const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; - - if (rfs == GOLDEN_FRAME) - pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); - else if (rfs == ALTREF_FRAME) - pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME; - else - pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); - } else { - if (above0 == LAST_FRAME && left0 == LAST_FRAME) { - pred_context = 3; - } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) { - const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0 - : above0; - pred_context = 4 * (edge0 == GOLDEN_FRAME); - } else { - pred_context = 2 * (above0 == GOLDEN_FRAME) + - 2 * (left0 == GOLDEN_FRAME); - } - } - } - } else if (has_above || has_left) { // one edge available - const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - - if (!is_inter_block(edge_mi) || - (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi))) - pred_context = 2; - else if (!has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); - else - pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || - edge_mi->ref_frame[1] == GOLDEN_FRAME); - } else { // no edges available (2) - pred_context = 2; - } - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.h b/thirdparty/libvpx/vp9/common/vp9_pred_common.h deleted file mode 100644 index f3c676e953..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_pred_common.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_PRED_COMMON_H_ -#define VP9_COMMON_VP9_PRED_COMMON_H_ - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vpx_dsp/vpx_dsp_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE int get_segment_id(const VP9_COMMON *cm, - const uint8_t *segment_ids, - BLOCK_SIZE bsize, int mi_row, int mi_col) { - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[bsize]; - const int bh = num_8x8_blocks_high_lookup[bsize]; - const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); - const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); - int x, y, segment_id = MAX_SEGMENTS; - - for (y = 0; y < ymis; ++y) - for (x = 0; x < xmis; ++x) - segment_id = - VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); - - assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); - return segment_id; -} - -static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) { - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int above_sip = (above_mi != NULL) ? - above_mi->seg_id_predicted : 0; - const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; - - return above_sip + left_sip; -} - -static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg, - const MACROBLOCKD *xd) { - return seg->pred_probs[vp9_get_pred_context_seg_id(xd)]; -} - -static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int above_skip = (above_mi != NULL) ? above_mi->skip : 0; - const int left_skip = (left_mi != NULL) ? left_mi->skip : 0; - return above_skip + left_skip; -} - -static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->skip_probs[vp9_get_skip_context(xd)]; -} - -int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd); - -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real macroblocks. -// The prediction flags in these dummy entries are initialized to 0. -// 0 - inter/inter, inter/--, --/inter, --/-- -// 1 - intra/inter, inter/intra -// 2 - intra/--, --/intra -// 3 - intra/intra -static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) { - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - return left_intra && above_intra ? 3 : left_intra || above_intra; - } else if (has_above || has_left) { // one edge available - return 2 * !is_inter_block(has_above ? above_mi : left_mi); - } - return 0; -} - -static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->intra_inter_prob[get_intra_inter_context(xd)]; -} - -int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)]; -} - -int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, - const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd); - return cm->fc->comp_ref_prob[pred_context]; -} - -int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0]; -} - -int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1]; -} - -// Returns a context number for the given MB prediction signal -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real blocks. -// The prediction flags in these dummy entries are initialized to 0. -static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { - const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type]; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size - : max_tx_size; - int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size - : max_tx_size; - if (!has_left) - left_ctx = above_ctx; - - if (!has_above) - above_ctx = left_ctx; - - return (above_ctx + left_ctx) > max_tx_size; -} - -static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, - const struct tx_probs *tx_probs) { - switch (max_tx_size) { - case TX_8X8: - return tx_probs->p8x8[ctx]; - case TX_16X16: - return tx_probs->p16x16[ctx]; - case TX_32X32: - return tx_probs->p32x32[ctx]; - default: - assert(0 && "Invalid max_tx_size."); - return NULL; - } -} - -static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size, - const MACROBLOCKD *xd, - const struct tx_probs *tx_probs) { - return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs); -} - -static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, - struct tx_counts *tx_counts) { - switch (max_tx_size) { - case TX_8X8: - return tx_counts->p8x8[ctx]; - case TX_16X16: - return tx_counts->p16x16[ctx]; - case TX_32X32: - return tx_counts->p32x32[ctx]; - default: - assert(0 && "Invalid max_tx_size."); - return NULL; - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.c b/thirdparty/libvpx/vp9/common/vp9_quant_common.c deleted file mode 100644 index d83f3c1a2f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_quant_common.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_seg_common.h" - -static const int16_t dc_qlookup[QINDEX_RANGE] = { - 4, 8, 8, 9, 10, 11, 12, 12, - 13, 14, 15, 16, 17, 18, 19, 19, - 20, 21, 22, 23, 24, 25, 26, 26, - 27, 28, 29, 30, 31, 32, 32, 33, - 34, 35, 36, 37, 38, 38, 39, 40, - 41, 42, 43, 43, 44, 45, 46, 47, - 48, 48, 49, 50, 51, 52, 53, 53, - 54, 55, 56, 57, 57, 58, 59, 60, - 61, 62, 62, 63, 64, 65, 66, 66, - 67, 68, 69, 70, 70, 71, 72, 73, - 74, 74, 75, 76, 77, 78, 78, 79, - 80, 81, 81, 82, 83, 84, 85, 85, - 87, 88, 90, 92, 93, 95, 96, 98, - 99, 101, 102, 104, 105, 107, 108, 110, - 111, 113, 114, 116, 117, 118, 120, 121, - 123, 125, 127, 129, 131, 134, 136, 138, - 140, 142, 144, 146, 148, 150, 152, 154, - 156, 158, 161, 164, 166, 169, 172, 174, - 177, 180, 182, 185, 187, 190, 192, 195, - 199, 202, 205, 208, 211, 214, 217, 220, - 223, 226, 230, 233, 237, 240, 243, 247, - 250, 253, 257, 261, 265, 269, 272, 276, - 280, 284, 288, 292, 296, 300, 304, 309, - 313, 317, 322, 326, 330, 335, 340, 344, - 349, 354, 359, 364, 369, 374, 379, 384, - 389, 395, 400, 406, 411, 417, 423, 429, - 435, 441, 447, 454, 461, 467, 475, 482, - 489, 497, 505, 513, 522, 530, 539, 549, - 559, 569, 579, 590, 602, 614, 626, 640, - 654, 668, 684, 700, 717, 736, 755, 775, - 796, 819, 843, 869, 896, 925, 955, 988, - 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, -}; - -#if CONFIG_VP9_HIGHBITDEPTH -static const int16_t dc_qlookup_10[QINDEX_RANGE] = { - 4, 9, 10, 13, 15, 17, 20, 22, - 25, 28, 31, 34, 37, 40, 43, 47, - 50, 53, 57, 60, 64, 68, 71, 75, - 78, 82, 86, 90, 93, 97, 101, 105, - 109, 113, 116, 120, 124, 128, 132, 136, - 140, 143, 147, 151, 155, 159, 163, 166, - 170, 174, 178, 182, 185, 189, 193, 197, - 200, 204, 208, 212, 215, 219, 223, 226, - 230, 233, 237, 241, 244, 248, 251, 255, - 259, 262, 266, 269, 273, 276, 280, 283, - 287, 290, 293, 297, 300, 304, 307, 310, - 314, 317, 321, 324, 327, 331, 334, 337, - 343, 350, 356, 362, 369, 375, 381, 387, - 394, 400, 406, 412, 418, 424, 430, 436, - 442, 448, 454, 460, 466, 472, 478, 484, - 490, 499, 507, 516, 525, 533, 542, 550, - 559, 567, 576, 584, 592, 601, 609, 617, - 625, 634, 644, 655, 666, 676, 687, 698, - 708, 718, 729, 739, 749, 759, 770, 782, - 795, 807, 819, 831, 844, 856, 868, 880, - 891, 906, 920, 933, 947, 961, 975, 988, - 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, - 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, - 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, - 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, - 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, - 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, - 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, - 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, - 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, - 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, - 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, -}; - -static const int16_t dc_qlookup_12[QINDEX_RANGE] = { - 4, 12, 18, 25, 33, 41, 50, 60, - 70, 80, 91, 103, 115, 127, 140, 153, - 166, 180, 194, 208, 222, 237, 251, 266, - 281, 296, 312, 327, 343, 358, 374, 390, - 405, 421, 437, 453, 469, 484, 500, 516, - 532, 548, 564, 580, 596, 611, 627, 643, - 659, 674, 690, 706, 721, 737, 752, 768, - 783, 798, 814, 829, 844, 859, 874, 889, - 904, 919, 934, 949, 964, 978, 993, 1008, - 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, - 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, - 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, - 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, - 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, - 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, - 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, - 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, - 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, - 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, - 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, - 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, - 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, - 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, - 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, - 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, - 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, - 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, - 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, - 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, - 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, - 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, - 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387, -}; -#endif - -static const int16_t ac_qlookup[QINDEX_RANGE] = { - 4, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, - 79, 80, 81, 82, 83, 84, 85, 86, - 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, - 104, 106, 108, 110, 112, 114, 116, 118, - 120, 122, 124, 126, 128, 130, 132, 134, - 136, 138, 140, 142, 144, 146, 148, 150, - 152, 155, 158, 161, 164, 167, 170, 173, - 176, 179, 182, 185, 188, 191, 194, 197, - 200, 203, 207, 211, 215, 219, 223, 227, - 231, 235, 239, 243, 247, 251, 255, 260, - 265, 270, 275, 280, 285, 290, 295, 300, - 305, 311, 317, 323, 329, 335, 341, 347, - 353, 359, 366, 373, 380, 387, 394, 401, - 408, 416, 424, 432, 440, 448, 456, 465, - 474, 483, 492, 501, 510, 520, 530, 540, - 550, 560, 571, 582, 593, 604, 615, 627, - 639, 651, 663, 676, 689, 702, 715, 729, - 743, 757, 771, 786, 801, 816, 832, 848, - 864, 881, 898, 915, 933, 951, 969, 988, - 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, - 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, - 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, - 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, -}; - -#if CONFIG_VP9_HIGHBITDEPTH -static const int16_t ac_qlookup_10[QINDEX_RANGE] = { - 4, 9, 11, 13, 16, 18, 21, 24, - 27, 30, 33, 37, 40, 44, 48, 51, - 55, 59, 63, 67, 71, 75, 79, 83, - 88, 92, 96, 100, 105, 109, 114, 118, - 122, 127, 131, 136, 140, 145, 149, 154, - 158, 163, 168, 172, 177, 181, 186, 190, - 195, 199, 204, 208, 213, 217, 222, 226, - 231, 235, 240, 244, 249, 253, 258, 262, - 267, 271, 275, 280, 284, 289, 293, 297, - 302, 306, 311, 315, 319, 324, 328, 332, - 337, 341, 345, 349, 354, 358, 362, 367, - 371, 375, 379, 384, 388, 392, 396, 401, - 409, 417, 425, 433, 441, 449, 458, 466, - 474, 482, 490, 498, 506, 514, 523, 531, - 539, 547, 555, 563, 571, 579, 588, 596, - 604, 616, 628, 640, 652, 664, 676, 688, - 700, 713, 725, 737, 749, 761, 773, 785, - 797, 809, 825, 841, 857, 873, 889, 905, - 922, 938, 954, 970, 986, 1002, 1018, 1038, - 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, - 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, - 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, - 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, - 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, - 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, - 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, - 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, - 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, - 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, - 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, - 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, - 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, -}; - -static const int16_t ac_qlookup_12[QINDEX_RANGE] = { - 4, 13, 19, 27, 35, 44, 54, 64, - 75, 87, 99, 112, 126, 139, 154, 168, - 183, 199, 214, 230, 247, 263, 280, 297, - 314, 331, 349, 366, 384, 402, 420, 438, - 456, 475, 493, 511, 530, 548, 567, 586, - 604, 623, 642, 660, 679, 698, 716, 735, - 753, 772, 791, 809, 828, 846, 865, 884, - 902, 920, 939, 957, 976, 994, 1012, 1030, - 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, - 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, - 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, - 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, - 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, - 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, - 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, - 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, - 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, - 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, - 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, - 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, - 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, - 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, - 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, - 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, - 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, - 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, - 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, - 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, - 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, - 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, - 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, - 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247, -}; -#endif - -int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { -#if CONFIG_VP9_HIGHBITDEPTH - switch (bit_depth) { - case VPX_BITS_8: - return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_10: - return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_12: - return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } -#else - (void) bit_depth; - return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; -#endif -} - -int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { -#if CONFIG_VP9_HIGHBITDEPTH - switch (bit_depth) { - case VPX_BITS_8: - return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_10: - return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_12: - return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } -#else - (void) bit_depth; - return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; -#endif -} - -int vp9_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex) { - if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) { - const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); - const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ? - data : base_qindex + data; - return clamp(seg_qindex, 0, MAXQ); - } else { - return base_qindex; - } -} - diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.h b/thirdparty/libvpx/vp9/common/vp9_quant_common.h deleted file mode 100644 index 4bae4a8967..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_quant_common.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_ -#define VP9_COMMON_VP9_QUANT_COMMON_H_ - -#include "vpx/vpx_codec.h" -#include "vp9/common/vp9_seg_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MINQ 0 -#define MAXQ 255 -#define QINDEX_RANGE (MAXQ - MINQ + 1) -#define QINDEX_BITS 8 - -int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); -int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); - -int vp9_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.c b/thirdparty/libvpx/vp9/common/vp9_reconinter.c deleted file mode 100644 index 84718e9703..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconinter.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_scale_rtcd.h" -#include "./vpx_config.h" - -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_reconintra.h" - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *src_mv, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y, int bd) { - const int is_q4 = precision == MV_PRECISION_Q4; - const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, - is_q4 ? src_mv->col : src_mv->col * 2 }; - MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); - const int subpel_x = mv.col & SUBPEL_MASK; - const int subpel_y = mv.row & SUBPEL_MASK; - - src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - - highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, - bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *src_mv, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y) { - const int is_q4 = precision == MV_PRECISION_Q4; - const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, - is_q4 ? src_mv->col : src_mv->col * 2 }; - MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); - const int subpel_x = mv.col & SUBPEL_MASK; - const int subpel_y = mv.row & SUBPEL_MASK; - - src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - - inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4); -} - -static INLINE int round_mv_comp_q4(int value) { - return (value < 0 ? value - 2 : value + 2) / 4; -} - -static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { - MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + - mi->bmi[1].as_mv[idx].as_mv.row + - mi->bmi[2].as_mv[idx].as_mv.row + - mi->bmi[3].as_mv[idx].as_mv.row), - round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + - mi->bmi[1].as_mv[idx].as_mv.col + - mi->bmi[2].as_mv[idx].as_mv.col + - mi->bmi[3].as_mv[idx].as_mv.col) }; - return res; -} - -static INLINE int round_mv_comp_q2(int value) { - return (value < 0 ? value - 1 : value + 1) / 2; -} - -static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) { - MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row + - mi->bmi[block1].as_mv[idx].as_mv.row), - round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col + - mi->bmi[block1].as_mv[idx].as_mv.col) }; - return res; -} - -// TODO(jkoleszar): yet another mv clamping function :-( -MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, - int bw, int bh, int ss_x, int ss_y) { - // If the MV points so far into the UMV border that no visible pixels - // are used for reconstruction, the subpel part of the MV can be - // discarded and the MV limited to 16 pixels with equivalent results. - const int spel_left = (VP9_INTERP_EXTEND + bw) << SUBPEL_BITS; - const int spel_right = spel_left - SUBPEL_SHIFTS; - const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS; - const int spel_bottom = spel_top - SUBPEL_SHIFTS; - MV clamped_mv = { - src_mv->row * (1 << (1 - ss_y)), - src_mv->col * (1 << (1 - ss_x)) - }; - assert(ss_x <= 1); - assert(ss_y <= 1); - - clamp_mv(&clamped_mv, - xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, - xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right, - xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top, - xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom); - - return clamped_mv; -} - -MV average_split_mvs(const struct macroblockd_plane *pd, - const MODE_INFO *mi, int ref, int block) { - const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0); - MV res = {0, 0}; - switch (ss_idx) { - case 0: - res = mi->bmi[block].as_mv[ref].as_mv; - break; - case 1: - res = mi_mv_pred_q2(mi, ref, block, block + 2); - break; - case 2: - res = mi_mv_pred_q2(mi, ref, block, block + 1); - break; - case 3: - res = mi_mv_pred_q4(mi, ref); - break; - default: - assert(ss_idx <= 3 && ss_idx >= 0); - } - return res; -} - -static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi[0]; - const int is_compound = has_second_ref(mi); - const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; - int ref; - - for (ref = 0; ref < 1 + is_compound; ++ref) { - const struct scale_factors *const sf = &xd->block_refs[ref]->sf; - struct buf_2d *const pre_buf = &pd->pre[ref]; - struct buf_2d *const dst_buf = &pd->dst; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - const MV mv = mi->sb_type < BLOCK_8X8 - ? average_split_mvs(pd, mi, ref, block) - : mi->mv[ref].as_mv; - - // TODO(jkoleszar): This clamping is done in the incorrect place for the - // scaling case. It needs to be done on the scaled MV, not the pre-scaling - // MV. Note however that it performs the subsampling aware scaling so - // that the result is always q4. - // mv_precision precision is MV_PRECISION_Q4. - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, - pd->subsampling_x, - pd->subsampling_y); - - uint8_t *pre; - MV32 scaled_mv; - int xs, ys, subpel_x, subpel_y; - const int is_scaled = vp9_is_scaled(sf); - - if (is_scaled) { - // Co-ordinate of containing block to pixel precision. - const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); - const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); -#if CONFIG_BETTER_HW_COMPATIBILITY - assert(xd->mi[0]->sb_type != BLOCK_4X8 && - xd->mi[0]->sb_type != BLOCK_8X4); - assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && - mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x))); -#endif - if (plane == 0) - pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; - else if (plane == 1) - pre_buf->buf = xd->block_refs[ref]->buf->u_buffer; - else - pre_buf->buf = xd->block_refs[ref]->buf->v_buffer; - - pre_buf->buf += scaled_buffer_offset(x_start + x, y_start + y, - pre_buf->stride, sf); - pre = pre_buf->buf; - scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); - xs = sf->x_step_q4; - ys = sf->y_step_q4; - } else { - pre = pre_buf->buf + (y * pre_buf->stride + x); - scaled_mv.row = mv_q4.row; - scaled_mv.col = mv_q4.col; - xs = ys = 16; - } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride - + (scaled_mv.col >> SUBPEL_BITS); - -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, - xd->bd); - } else { - inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); - } -#else - inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH - } -} - -static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, - int mi_row, int mi_col, - int plane_from, int plane_to) { - int plane; - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - for (plane = plane_from; plane <= plane_to; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, - &xd->plane[plane]); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const int bw = 4 * num_4x4_w; - const int bh = 4 * num_4x4_h; - - if (xd->mi[0]->sb_type < BLOCK_8X8) { - int i = 0, x, y; - assert(bsize == BLOCK_8X8); - for (y = 0; y < num_4x4_h; ++y) - for (x = 0; x < num_4x4_w; ++x) - build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); - } else { - build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); - } - } -} - -void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0); -} - -void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, int plane) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane); -} - -void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1, - MAX_MB_PLANE - 1); -} - -void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, - MAX_MB_PLANE - 1); -} - -void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col) { - uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, - src->v_buffer}; - const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, - src->uv_stride}; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &planes[i]; - setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, - pd->subsampling_x, pd->subsampling_y); - } -} - -void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *sf) { - if (src != NULL) { - int i; - uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, - src->v_buffer}; - const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, - src->uv_stride}; - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, - sf, pd->subsampling_x, pd->subsampling_y); - } - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.h b/thirdparty/libvpx/vp9/common/vp9_reconinter.h deleted file mode 100644 index 07745e3aaa..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconinter.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_RECONINTER_H_ -#define VP9_COMMON_VP9_RECONINTER_H_ - -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_filter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE void inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys) { - sf->predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys, int bd) { - sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, - int ref, int block); - -MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, - int bw, int bh, int ss_x, int ss_y); - -void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - -void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, int plane); - -void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - -void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - -void vp9_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *mv_q3, - const struct scale_factors *sf, - int w, int h, int do_avg, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *mv_q3, - const struct scale_factors *sf, - int w, int h, int do_avg, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y, int bd); -#endif - -static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, - const struct scale_factors *sf) { - const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset; - const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset; - return y * stride + x; -} - -static INLINE void setup_pred_plane(struct buf_2d *dst, - uint8_t *src, int stride, - int mi_row, int mi_col, - const struct scale_factors *scale, - int subsampling_x, int subsampling_y) { - const int x = (MI_SIZE * mi_col) >> subsampling_x; - const int y = (MI_SIZE * mi_row) >> subsampling_y; - dst->buf = src + scaled_buffer_offset(x, y, stride, scale); - dst->stride = stride; -} - -void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col); - -void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *sf); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.c b/thirdparty/libvpx/vp9/common/vp9_reconintra.c deleted file mode 100644 index 445785835a..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconintra.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" - -#if CONFIG_VP9_HIGHBITDEPTH -#include "vpx_dsp/vpx_dsp_common.h" -#endif // CONFIG_VP9_HIGHBITDEPTH -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/vpx_once.h" - -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_onyxc_int.h" - -const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { - DCT_DCT, // DC - ADST_DCT, // V - DCT_ADST, // H - DCT_DCT, // D45 - ADST_ADST, // D135 - ADST_DCT, // D117 - DCT_ADST, // D153 - DCT_ADST, // D207 - ADST_DCT, // D63 - ADST_ADST, // TM -}; - -enum { - NEED_LEFT = 1 << 1, - NEED_ABOVE = 1 << 2, - NEED_ABOVERIGHT = 1 << 3, -}; - -static const uint8_t extend_modes[INTRA_MODES] = { - NEED_ABOVE | NEED_LEFT, // DC - NEED_ABOVE, // V - NEED_LEFT, // H - NEED_ABOVERIGHT, // D45 - NEED_LEFT | NEED_ABOVE, // D135 - NEED_LEFT | NEED_ABOVE, // D117 - NEED_LEFT | NEED_ABOVE, // D153 - NEED_LEFT, // D207 - NEED_ABOVERIGHT, // D63 - NEED_LEFT | NEED_ABOVE, // TM -}; - -typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left); - -static intra_pred_fn pred[INTRA_MODES][TX_SIZES]; -static intra_pred_fn dc_pred[2][2][TX_SIZES]; - -#if CONFIG_VP9_HIGHBITDEPTH -typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, const uint16_t *left, - int bd); -static intra_high_pred_fn pred_high[INTRA_MODES][4]; -static intra_high_pred_fn dc_pred_high[2][2][4]; -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void vp9_init_intra_predictors_internal(void) { -#define INIT_ALL_SIZES(p, type) \ - p[TX_4X4] = vpx_##type##_predictor_4x4; \ - p[TX_8X8] = vpx_##type##_predictor_8x8; \ - p[TX_16X16] = vpx_##type##_predictor_16x16; \ - p[TX_32X32] = vpx_##type##_predictor_32x32 - - INIT_ALL_SIZES(pred[V_PRED], v); - INIT_ALL_SIZES(pred[H_PRED], h); - INIT_ALL_SIZES(pred[D207_PRED], d207); - INIT_ALL_SIZES(pred[D45_PRED], d45); - INIT_ALL_SIZES(pred[D63_PRED], d63); - INIT_ALL_SIZES(pred[D117_PRED], d117); - INIT_ALL_SIZES(pred[D135_PRED], d135); - INIT_ALL_SIZES(pred[D153_PRED], d153); - INIT_ALL_SIZES(pred[TM_PRED], tm); - - INIT_ALL_SIZES(dc_pred[0][0], dc_128); - INIT_ALL_SIZES(dc_pred[0][1], dc_top); - INIT_ALL_SIZES(dc_pred[1][0], dc_left); - INIT_ALL_SIZES(dc_pred[1][1], dc); - -#if CONFIG_VP9_HIGHBITDEPTH - INIT_ALL_SIZES(pred_high[V_PRED], highbd_v); - INIT_ALL_SIZES(pred_high[H_PRED], highbd_h); - INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207); - INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45); - INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63); - INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117); - INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135); - INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153); - INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm); - - INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128); - INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top); - INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left); - INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc); -#endif // CONFIG_VP9_HIGHBITDEPTH - -#undef intra_pred_allsizes -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void build_intra_predictors_high(const MACROBLOCKD *xd, - const uint8_t *ref8, - int ref_stride, - uint8_t *dst8, - int dst_stride, - PREDICTION_MODE mode, - TX_SIZE tx_size, - int up_available, - int left_available, - int right_available, - int x, int y, - int plane, int bd) { - int i; - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - DECLARE_ALIGNED(16, uint16_t, left_col[32]); - DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); - uint16_t *above_row = above_data + 16; - const uint16_t *const_above_row = above_row; - const int bs = 4 << tx_size; - int frame_width, frame_height; - int x0, y0; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int need_left = extend_modes[mode] & NEED_LEFT; - const int need_above = extend_modes[mode] & NEED_ABOVE; - const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; - int base = 128 << (bd - 8); - // 127 127 127 .. 127 127 127 127 127 127 - // 129 A B .. Y Z - // 129 C D .. W X - // 129 E F .. U V - // 129 G H .. S T T T T T - // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. - - // Get current frame pointer, width and height. - if (plane == 0) { - frame_width = xd->cur_buf->y_width; - frame_height = xd->cur_buf->y_height; - } else { - frame_width = xd->cur_buf->uv_width; - frame_height = xd->cur_buf->uv_height; - } - - // Get block position in current frame. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // NEED_LEFT - if (need_left) { - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) - left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; - } - } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } - } else { - vpx_memset16(left_col, base + 1, bs); - } - } - - // NEED_ABOVE - if (need_above) { - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + bs <= frame_width) { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - } - } - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } else { - vpx_memset16(above_row, base - 1, bs); - above_row[-1] = base - 1; - } - } - - // NEED_ABOVERIGHT - if (need_aboveright) { - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); - } - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); - else - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } - } - } else { - vpx_memset16(above_row, base - 1, bs * 2); - above_row[-1] = base - 1; - } - } - - // predict - if (mode == DC_PRED) { - dc_pred_high[left_available][up_available][tx_size](dst, dst_stride, - const_above_row, - left_col, xd->bd); - } else { - pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, - xd->bd); - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, - int ref_stride, uint8_t *dst, int dst_stride, - PREDICTION_MODE mode, TX_SIZE tx_size, - int up_available, int left_available, - int right_available, int x, int y, - int plane) { - int i; - DECLARE_ALIGNED(16, uint8_t, left_col[32]); - DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); - uint8_t *above_row = above_data + 16; - const uint8_t *const_above_row = above_row; - const int bs = 4 << tx_size; - int frame_width, frame_height; - int x0, y0; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - - // 127 127 127 .. 127 127 127 127 127 127 - // 129 A B .. Y Z - // 129 C D .. W X - // 129 E F .. U V - // 129 G H .. S T T T T T - // .. - - // Get current frame pointer, width and height. - if (plane == 0) { - frame_width = xd->cur_buf->y_width; - frame_height = xd->cur_buf->y_height; - } else { - frame_width = xd->cur_buf->uv_width; - frame_height = xd->cur_buf->uv_height; - } - - // Get block position in current frame. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // NEED_LEFT - if (extend_modes[mode] & NEED_LEFT) { - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) - left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; - } - } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } - } else { - memset(left_col, 129, bs); - } - } - - // NEED_ABOVE - if (extend_modes[mode] & NEED_ABOVE) { - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + bs <= frame_width) { - memcpy(above_row, above_ref, bs); - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs); - } - } - above_row[-1] = left_available ? above_ref[-1] : 129; - } else { - memset(above_row, 127, bs); - above_row[-1] = 127; - } - } - - // NEED_ABOVERIGHT - if (extend_modes[mode] & NEED_ABOVERIGHT) { - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs); - } else { - memcpy(above_row, above_ref, bs); - memset(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); - } else { - memcpy(above_row, above_ref, bs); - memset(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs); - else - memset(above_row + bs, above_row[bs - 1], bs); - } - } - above_row[-1] = left_available ? above_ref[-1] : 129; - } else { - memset(above_row, 127, bs * 2); - above_row[-1] = 127; - } - } - - // predict - if (mode == DC_PRED) { - dc_pred[left_available][up_available][tx_size](dst, dst_stride, - const_above_row, left_col); - } else { - pred[mode][tx_size](dst, dst_stride, const_above_row, left_col); - } -} - -void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, - TX_SIZE tx_size, PREDICTION_MODE mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - int aoff, int loff, int plane) { - const int bw = (1 << bwl_in); - const int txw = (1 << tx_size); - const int have_top = loff || (xd->above_mi != NULL); - const int have_left = aoff || (xd->left_mi != NULL); - const int have_right = (aoff + txw) < bw; - const int x = aoff * 4; - const int y = loff * 4; - -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, - tx_size, have_top, have_left, have_right, - x, y, plane, xd->bd); - return; - } -#endif - build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, - have_top, have_left, have_right, x, y, plane); -} - -void vp9_init_intra_predictors(void) { - once(vp9_init_intra_predictors_internal); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.h b/thirdparty/libvpx/vp9/common/vp9_reconintra.h deleted file mode 100644 index de453808b7..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconintra.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_RECONINTRA_H_ -#define VP9_COMMON_VP9_RECONINTRA_H_ - -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_init_intra_predictors(void); - -void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, - TX_SIZE tx_size, PREDICTION_MODE mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - int aoff, int loff, int plane); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_rtcd.c b/thirdparty/libvpx/vp9/common/vp9_rtcd.c deleted file mode 100644 index 2dfa09f50e..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_rtcd.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vp9_rtcd.h" -#include "vpx_ports/vpx_once.h" - -void vp9_rtcd() { - // TODO(JBB): Remove this once, by insuring that both the encoder and - // decoder setup functions are protected by once(); - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.c b/thirdparty/libvpx/vp9/common/vp9_scale.c deleted file mode 100644 index b763b925b3..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scale.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_scale.h" -#include "vpx_dsp/vpx_filter.h" - -static INLINE int scaled_x(int val, const struct scale_factors *sf) { - return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT); -} - -static INLINE int scaled_y(int val, const struct scale_factors *sf) { - return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT); -} - -static int unscaled_value(int val, const struct scale_factors *sf) { - (void) sf; - return val; -} - -static int get_fixed_point_scale_factor(int other_size, int this_size) { - // Calculate scaling factor once for each reference frame - // and use fixed point scaling factors in decoding and encoding routines. - // Hardware implementations can calculate scale factor in device driver - // and use multiplication and shifting on hardware instead of division. - return (other_size << REF_SCALE_SHIFT) / this_size; -} - -MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) { - const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK; - const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK; - const MV32 res = { - scaled_y(mv->row, sf) + y_off_q4, - scaled_x(mv->col, sf) + x_off_q4 - }; - return res; -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h, - int use_highbd) { -#else -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h) { -#endif - if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { - sf->x_scale_fp = REF_INVALID_SCALE; - sf->y_scale_fp = REF_INVALID_SCALE; - return; - } - - sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); - sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); - sf->x_step_q4 = scaled_x(16, sf); - sf->y_step_q4 = scaled_y(16, sf); - - if (vp9_is_scaled(sf)) { - sf->scale_value_x = scaled_x; - sf->scale_value_y = scaled_y; - } else { - sf->scale_value_x = unscaled_value; - sf->scale_value_y = unscaled_value; - } - - // TODO(agrange): Investigate the best choice of functions to use here - // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what - // to do at full-pel offsets. The current selection, where the filter is - // applied in one direction only, and not at all for 0,0, seems to give the - // best quality, but it may be worth trying an additional mode that does - // do the filtering on full-pel. - - if (sf->x_step_q4 == 16) { - if (sf->y_step_q4 == 16) { - // No scaling in either direction. - sf->predict[0][0][0] = vpx_convolve_copy; - sf->predict[0][0][1] = vpx_convolve_avg; - sf->predict[0][1][0] = vpx_convolve8_vert; - sf->predict[0][1][1] = vpx_convolve8_avg_vert; - sf->predict[1][0][0] = vpx_convolve8_horiz; - sf->predict[1][0][1] = vpx_convolve8_avg_horiz; - } else { - // No scaling in x direction. Must always scale in the y direction. - sf->predict[0][0][0] = vpx_scaled_vert; - sf->predict[0][0][1] = vpx_scaled_avg_vert; - sf->predict[0][1][0] = vpx_scaled_vert; - sf->predict[0][1][1] = vpx_scaled_avg_vert; - sf->predict[1][0][0] = vpx_scaled_2d; - sf->predict[1][0][1] = vpx_scaled_avg_2d; - } - } else { - if (sf->y_step_q4 == 16) { - // No scaling in the y direction. Must always scale in the x direction. - sf->predict[0][0][0] = vpx_scaled_horiz; - sf->predict[0][0][1] = vpx_scaled_avg_horiz; - sf->predict[0][1][0] = vpx_scaled_2d; - sf->predict[0][1][1] = vpx_scaled_avg_2d; - sf->predict[1][0][0] = vpx_scaled_horiz; - sf->predict[1][0][1] = vpx_scaled_avg_horiz; - } else { - // Must always scale in both directions. - sf->predict[0][0][0] = vpx_scaled_2d; - sf->predict[0][0][1] = vpx_scaled_avg_2d; - sf->predict[0][1][0] = vpx_scaled_2d; - sf->predict[0][1][1] = vpx_scaled_avg_2d; - sf->predict[1][0][0] = vpx_scaled_2d; - sf->predict[1][0][1] = vpx_scaled_avg_2d; - } - } - - // 2D subpel motion always gets filtered in both directions - - if ((sf->x_step_q4 != 16) || (sf->y_step_q4 != 16)) { - sf->predict[1][1][0] = vpx_scaled_2d; - sf->predict[1][1][1] = vpx_scaled_avg_2d; - } else { - sf->predict[1][1][0] = vpx_convolve8; - sf->predict[1][1][1] = vpx_convolve8_avg; - } - -#if CONFIG_VP9_HIGHBITDEPTH - if (use_highbd) { - if (sf->x_step_q4 == 16) { - if (sf->y_step_q4 == 16) { - // No scaling in either direction. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; - } else { - // No scaling in x direction. Must always scale in the y direction. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; - } - } else { - if (sf->y_step_q4 == 16) { - // No scaling in the y direction. Must always scale in the x direction. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; - } else { - // Must always scale in both directions. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve8; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; - } - } - // 2D subpel motion always gets filtered in both directions. - sf->highbd_predict[1][1][0] = vpx_highbd_convolve8; - sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg; - } -#endif -} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.h b/thirdparty/libvpx/vp9/common/vp9_scale.h deleted file mode 100644 index 5e91041079..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scale.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SCALE_H_ -#define VP9_COMMON_VP9_SCALE_H_ - -#include "vp9/common/vp9_mv.h" -#include "vpx_dsp/vpx_convolve.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define REF_SCALE_SHIFT 14 -#define REF_NO_SCALE (1 << REF_SCALE_SHIFT) -#define REF_INVALID_SCALE -1 - -struct scale_factors { - int x_scale_fp; // horizontal fixed point scale factor - int y_scale_fp; // vertical fixed point scale factor - int x_step_q4; - int y_step_q4; - - int (*scale_value_x)(int val, const struct scale_factors *sf); - int (*scale_value_y)(int val, const struct scale_factors *sf); - - convolve_fn_t predict[2][2][2]; // horiz, vert, avg -#if CONFIG_VP9_HIGHBITDEPTH - highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg -#endif -}; - -MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h, - int use_high); -#else -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h); -#endif - -static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) { - return sf->x_scale_fp != REF_INVALID_SCALE && - sf->y_scale_fp != REF_INVALID_SCALE; -} - -static INLINE int vp9_is_scaled(const struct scale_factors *sf) { - return vp9_is_valid_scale(sf) && - (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE); -} - -static INLINE int valid_ref_frame_size(int ref_width, int ref_height, - int this_width, int this_height) { - return 2 * this_width >= ref_width && - 2 * this_height >= ref_height && - this_width <= 16 * ref_width && - this_height <= 16 * ref_height; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_SCALE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.c b/thirdparty/libvpx/vp9/common/vp9_scan.c deleted file mode 100644 index 8b8b09f4a3..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scan.c +++ /dev/null @@ -1,725 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_scan.h" - -DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { - 0, 4, 1, 5, - 8, 2, 12, 9, - 3, 6, 13, 10, - 7, 14, 11, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = { - 0, 4, 8, 1, - 12, 5, 9, 2, - 13, 6, 10, 3, - 7, 14, 11, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = { - 0, 1, 4, 2, - 5, 3, 6, 8, - 9, 7, 12, 10, - 13, 11, 14, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { - 0, 8, 1, 16, 9, 2, 17, 24, - 10, 3, 18, 25, 32, 11, 4, 26, - 33, 19, 40, 12, 34, 27, 5, 41, - 20, 48, 13, 35, 42, 28, 21, 6, - 49, 56, 36, 43, 29, 7, 14, 50, - 57, 44, 22, 37, 15, 51, 58, 30, - 45, 23, 52, 59, 38, 31, 60, 53, - 46, 39, 61, 54, 47, 62, 55, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = { - 0, 8, 16, 1, 24, 9, 32, 17, - 2, 40, 25, 10, 33, 18, 48, 3, - 26, 41, 11, 56, 19, 34, 4, 49, - 27, 42, 12, 35, 20, 57, 50, 28, - 5, 43, 13, 36, 58, 51, 21, 44, - 6, 29, 59, 37, 14, 52, 22, 7, - 45, 60, 30, 15, 38, 53, 23, 46, - 31, 61, 39, 54, 47, 62, 55, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = { - 0, 1, 2, 8, 9, 3, 16, 10, - 4, 17, 11, 24, 5, 18, 25, 12, - 19, 26, 32, 6, 13, 20, 33, 27, - 7, 34, 40, 21, 28, 41, 14, 35, - 48, 42, 29, 36, 49, 22, 43, 15, - 56, 37, 50, 44, 30, 57, 23, 51, - 58, 45, 38, 52, 31, 59, 53, 46, - 60, 39, 61, 47, 54, 55, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { - 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, - 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, - 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, - 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146, - 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25, - 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119, - 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194, - 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59, - 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13, - 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, - 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, - 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, - 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, - 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, - 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, - 251, - 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = { - 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81, - 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4, - 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21, - 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85, - 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179, - 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24, - 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227, - 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167, - 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229, - 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, - 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, - 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, - 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, - 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, - 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, - 236, - 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = { - 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20, - 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52, - 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69, - 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100, - 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102, - 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160, - 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176, - 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136, - 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166, - 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, - 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, - 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, - 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, - 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, - 158, - 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, - 175, - 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { - 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, - 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, - 68, 131, 37, 100, - 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, - 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, - 102, 352, 8, 197, - 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, - 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, - 41, 417, 199, 136, - 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, - 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, - 295, 420, 106, 451, - 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, - 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, - 453, 139, 44, 234, - 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, - 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, - 486, 77, 204, 362, - 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, - 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, - 111, 238, 48, 143, - 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, - 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, - 393, 300, 269, 176, 145, - 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, - 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, - 550, 519, 488, 457, 426, 395, - 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, - 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, - 210, 179, 117, 86, 55, 738, 707, - 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, - 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, - 645, 552, 521, 428, 397, 304, - 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, - 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, - 864, 833, 802, 771, 740, 709, - 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, - 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, - 710, 679, 617, 586, 555, 493, - 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, - 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, - 743, 619, 495, 371, 247, 123, - 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, - 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, - 898, 836, 805, 774, 712, 681, - 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, - 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, - 651, 620, 589, 558, 527, - 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, - 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, - 559, 497, 466, 435, 373, - 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, - 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, - 499, 375, 251, 127, - 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, - 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, - 685, 654, 592, 561, - 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, - 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, - 438, 407, 376, 345, - 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, - 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, - 967, 874, 843, 750, - 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, - 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, - 564, 533, 440, 409, - 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, - 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, - 752, 721, 690, 659, - 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, - 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, - 350, 319, 1002, 971, - 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, - 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, - 537, 444, 413, 972, - 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, - 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, - 570, 539, 508, 477, - 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, - 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, - 1007, 883, 759, 635, 511, - 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, - 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, - 884, 853, 822, 791, - 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, - 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, - 1011, 887, 763, 639, - 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, - 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, - 702, 671, 1013, 982, - 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, - 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, - 1016, 985, 954, 923, - 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, - 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, - 990, 959, 1022, 991, 1023, -}; - -// Neighborhood 2-tuples for various scans and blocksizes, -// in {top, left} order for each position in corresponding scan order. -DECLARE_ALIGNED(16, static const int16_t, - default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, - 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9, 9, 2, 2, 6, 6, 2, 2, 3, - 3, 10, 10, 7, 7, 11, 11, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8, 8, 6, 6, 8, 8, 9, 9, 12, - 12, 10, 10, 13, 13, 14, 14, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32, 32, 17, 17, 2, - 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3, 48, 48, 11, 11, 26, - 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12, 12, 49, 49, 42, 42, 20, - 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43, 13, 13, 36, 36, 5, 5, 21, 21, - 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6, 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, - 30, 45, 45, 15, 15, 38, 38, 23, 23, 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, - 47, 47, 55, 55, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16, 16, 10, 10, 16, 16, - 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24, 5, 5, 12, 12, 19, 19, - 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27, 27, 40, 40, 13, 13, 34, 34, - 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21, 42, 42, 14, 14, 48, 48, 36, - 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50, 50, 57, 57, 44, 44, 37, 37, - 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59, 38, 38, 60, 60, 46, 46, 53, - 53, 54, 54, 61, 61, 62, 62, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2, 2, 10, 17, 17, - 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32, 4, 11, 26, 33, 19, - 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34, 41, 20, 27, 13, 20, 5, - 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, - 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, 37, 44, 15, 22, 44, 51, - 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, 39, - 46, 54, 61, 47, 54, 55, 62, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64, 64, - 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2, 65, 65, - 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128, 128, 3, 3, - 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35, 113, 113, 3, 3, - 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67, 67, 20, 20, 83, 83, - 114, 114, 36, 36, 176, 176, 4, 4, 145, 145, 52, 52, 99, 99, 5, 5, - 130, 130, 68, 68, 192, 192, 161, 161, 21, 21, 115, 115, 84, 84, 37, 37, - 146, 146, 208, 208, 53, 53, 5, 5, 100, 100, 177, 177, 131, 131, 69, 69, - 6, 6, 224, 224, 116, 116, 22, 22, 162, 162, 85, 85, 147, 147, 38, 38, - 193, 193, 101, 101, 54, 54, 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, - 209, 209, 7, 7, 117, 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, - 225, 225, 39, 39, 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, - 71, 71, 210, 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, - 56, 56, 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, - 72, 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41, - 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151, 151, - 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182, 10, 10, - 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121, 121, 213, 213, - 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168, 10, 10, 90, 90, - 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27, 27, 199, 199, 43, 43, - 184, 184, 122, 122, 169, 169, 230, 230, 59, 59, 11, 11, 75, 75, 138, 138, - 200, 200, 215, 215, 91, 91, 12, 12, 28, 28, 185, 185, 107, 107, 154, 154, - 44, 44, 231, 231, 216, 216, 60, 60, 123, 123, 12, 12, 76, 76, 201, 201, - 170, 170, 232, 232, 139, 139, 92, 92, 13, 13, 108, 108, 29, 29, 186, 186, - 217, 217, 155, 155, 45, 45, 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, - 77, 77, 14, 14, 171, 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, - 46, 46, 156, 156, 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, - 31, 31, 172, 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, - 110, 110, 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, - 142, 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, - 220, 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, - 175, 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, - 223, 239, 239, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17, 17, - 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32, 19, 19, - 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7, 7, 35, 35, - 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8, 65, 65, 51, 51, - 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52, 52, 23, 23, 81, 81, - 67, 67, 80, 80, 38, 38, 10, 10, 53, 53, 82, 82, 96, 96, 68, 68, - 24, 24, 97, 97, 83, 83, 39, 39, 96, 96, 54, 54, 11, 11, 69, 69, - 98, 98, 112, 112, 84, 84, 25, 25, 40, 40, 55, 55, 113, 113, 99, 99, - 12, 12, 70, 70, 112, 112, 85, 85, 26, 26, 114, 114, 100, 100, 128, 128, - 41, 41, 56, 56, 71, 71, 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, - 128, 128, 72, 72, 130, 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, - 42, 42, 144, 144, 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, - 88, 88, 132, 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, - 160, 160, 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, - 74, 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44, - 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135, 135, - 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121, 60, 60, - 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45, 45, 165, 165, - 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208, 122, 122, 152, 152, - 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181, 181, 224, 224, 107, 107, - 196, 196, 61, 61, 153, 153, 224, 224, 182, 182, 168, 168, 210, 210, 46, 46, - 138, 138, 92, 92, 183, 183, 225, 225, 211, 211, 240, 240, 197, 197, 169, 169, - 123, 123, 154, 154, 198, 198, 77, 77, 212, 212, 184, 184, 108, 108, 226, 226, - 199, 199, 62, 62, 227, 227, 241, 241, 139, 139, 213, 213, 170, 170, 185, 185, - 155, 155, 228, 228, 242, 242, 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, - 215, 215, 229, 229, 140, 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, - 156, 156, 244, 244, 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, - 202, 202, 246, 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, - 157, 187, 187, 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, - 203, 203, 142, 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, - 219, 174, 174, 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, - 206, 206, 236, 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, - 238, 253, 253, 254, 254, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17, 32, - 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, 64, 64, - 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, 80, 35, 50, - 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96, 5, 20, 36, 51, - 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52, 67, 112, 112, 37, 52, - 6, 21, 83, 98, 98, 113, 68, 83, 6, 6, 113, 128, 22, 37, 53, 68, - 84, 99, 99, 114, 128, 128, 114, 129, 69, 84, 38, 53, 7, 22, 7, 7, - 129, 144, 23, 38, 54, 69, 100, 115, 85, 100, 115, 130, 144, 144, 130, 145, - 39, 54, 70, 85, 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, - 8, 8, 86, 101, 131, 146, 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, - 117, 132, 102, 117, 161, 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, - 9, 9, 176, 176, 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, - 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, - 10, 10, 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, - 164, 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, - 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, 58, - 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, 209, 224, - 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 12, 12, - 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, 122, 137, 91, 106, - 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, 183, 211, 226, 153, 168, - 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, 76, 91, 13, 13, 183, 198, - 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, 227, 242, 92, 107, - 61, 76, 139, 154, 14, 29, 14, 14, 184, 199, 213, 228, 108, 123, 199, 214, - 228, 243, 77, 92, 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, - 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, - 215, 230, 31, 46, 171, 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, - 47, 62, 216, 231, 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, - 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, - 203, 218, 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, - 234, 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, - 250, 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, - 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, - 238, 239, 254, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64, 33, 64, - 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128, 97, 128, 3, 34, - 66, 97, 3, 3, 35, 66, 98, 129, 129, 160, 160, 160, 4, 35, 67, 98, - 192, 192, 4, 4, 130, 161, 161, 192, 36, 67, 99, 130, 5, 36, 68, 99, - 193, 224, 162, 193, 224, 224, 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, - 225, 256, 256, 256, 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, - 195, 226, 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289, - 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7, 165, 196, - 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, 352, 352, 197, 228, - 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, 103, 134, 353, 384, - 166, 197, 229, 260, 40, 71, 8, 8, 384, 384, 135, 166, 354, 385, 323, 354, - 198, 229, 292, 323, 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, - 230, 261, 355, 386, 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, - 199, 230, 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, - 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73, 294, 325, - 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419, 74, 105, 419, 450, - 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, 11, 42, 106, 137, 480, 480, - 450, 481, 358, 389, 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, - 327, 358, 11, 11, 481, 512, 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, - 482, 513, 512, 512, 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, - 452, 483, 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, - 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545, 453, 484, - 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, 140, 171, 515, - 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, - 361, 576, 576, 13, 13, 267, 298, 546, 577, 77, 108, 204, 235, 455, 486, 577, - 608, 299, 330, 109, 140, 547, 578, 14, 45, 14, 14, 141, 172, 578, 609, 331, - 362, 46, 77, 173, 204, 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, - 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, - 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238, 49, 80, - 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, 608, 608, - 484, 515, 360, 391, 236, 267, 112, 143, 19, 19, 640, 640, 609, 640, 516, 547, - 485, 516, 392, 423, 361, 392, 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, - 20, 20, 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, - 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, - 145, 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642, 580, - 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, - 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, 146, 177, 115, - 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736, 674, 705, 643, 674, 581, 612, - 550, 581, 519, 550, 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, - 302, 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, 706, 737, 675, - 706, 582, 613, 551, 582, 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, - 179, 210, 86, 117, 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, - 87, 118, 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23, - 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, 365, - 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24, 800, 800, 769, - 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, - 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, 180, 211, 149, - 180, 118, 149, 56, 87, 25, 56, 25, 25, 832, 832, 801, 832, 770, 801, 739, - 770, 708, 739, 677, 708, 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, - 491, 522, 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, 274, - 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, - 57, 26, 26, 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, - 616, 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, - 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, 834, 865, - 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, 431, 462, 338, - 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, - 618, 463, 494, 339, 370, 215, 246, 91, 122, 864, 864, 740, 771, 616, 647, - 492, 523, 368, 399, 244, 275, 120, 151, 27, 27, 896, 896, 865, 896, 772, 803, - 741, 772, 648, 679, 617, 648, 524, 555, 493, 524, 400, 431, 369, 400, 276, - 307, 245, 276, 152, 183, 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, - 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, - 525, 556, 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, - 277, 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960, 929, - 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, 712, 743, - 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, 495, 526, 464, - 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, 278, 309, 247, 278, - 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 30, 30, - 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, 713, 744, 682, - 713, 651, 682, 589, 620, 558, 589, 527, 558, 465, 496, 434, 465, 403, 434, - 341, 372, 310, 341, 279, 310, 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, - 31, 62, 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, - 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, - 125, 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, 219, - 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, - 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, 621, 652, 528, - 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, 156, 187, 125, 156, - 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, 746, 777, 684, 715, 653, - 684, 622, 653, 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, - 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, 964, 995, 933, - 964, 902, 933, 871, 902, 840, 871, 809, 840, 778, 809, 747, 778, 716, 747, - 685, 716, 654, 685, 623, 654, 592, 623, 561, 592, 530, 561, 499, 530, 468, - 499, 437, 468, 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, - 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, - 872, 810, 841, 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, - 531, 562, 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, - 252, 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, - 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, 222, - 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, 347, 378, - 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, 252, 283, 904, - 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, - 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, 874, 905, 812, - 843, 781, 812, 750, 781, 688, 719, 657, 688, 626, 657, 564, 595, 533, 564, - 502, 533, 440, 471, 409, 440, 378, 409, 316, 347, 285, 316, 254, 285, 968, - 999, 937, 968, 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, - 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, 534, 565, 503, - 534, 472, 503, 441, 472, 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, - 255, 286, 969, 1000, 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, - 752, 690, 721, 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, - 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, - 846, 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, - 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, 876, - 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, - 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, 381, 412, 940, - 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, 692, 723, 661, 692, - 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, 413, 444, 382, 413, 972, - 1003, 941, 972, 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, - 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, 538, 569, 507, - 538, 476, 507, 445, 476, 414, 445, 383, 414, 973, 1004, 942, 973, 911, 942, - 849, 880, 818, 849, 787, 818, 725, 756, 694, 725, 663, 694, 601, 632, 570, - 601, 539, 570, 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, - 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, - 1006, 851, 882, 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, - 508, 539, 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, - 571, 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, - 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, 945, - 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, 728, 759, - 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, 511, 542, 977, - 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, - 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, 854, 885, 823, - 854, 730, 761, 699, 730, 606, 637, 575, 606, 979, 1010, 855, 886, 731, 762, - 607, 638, 884, 915, 760, 791, 636, 667, 916, 947, 885, 916, 792, 823, 761, - 792, 668, 699, 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, - 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, 918, 949, 887, - 918, 856, 887, 825, 856, 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, - 639, 670, 981, 1012, 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, - 764, 702, 733, 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, - 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, - 920, 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, - 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, 798, - 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, - 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, 892, 923, 924, - 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, 957, 988, 926, 957, - 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_4x4[16]) = { - 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_4x4[16]) = { - 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_4x4[16]) = { - 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_8x8[64]) = { - 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, - 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, - 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, - 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_8x8[64]) = { - 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, - 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, - 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59, - 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_8x8[64]) = { - 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, - 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, - 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60, - 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_16x16[256]) = { - 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, - 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, - 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, - 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, - 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, - 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, - 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, - 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, - 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, - 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, - 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, - 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, - 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, - 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, - 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, - 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_16x16[256]) = { - 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, 86, - 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, 115, 130, - 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, 119, 142, 167, - 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, 116, 135, 161, 185, - 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, 112, 133, 154, 179, 205, - 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, 106, 121, 146, 169, 196, 212, - 41, 46, 49, 56, 63, 70, 79, 90, 98, 107, 122, 138, 159, 182, 207, 222, - 52, 57, 62, 69, 75, 83, 93, 102, 110, 120, 134, 150, 176, 195, 215, 226, - 66, 71, 78, 82, 91, 97, 108, 113, 127, 136, 148, 168, 188, 202, 221, 232, - 80, 89, 92, 101, 105, 114, 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, - 95, 104, 109, 117, 123, 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, - 111, 118, 124, 129, 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, - 243, 126, 132, 137, 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, - 244, 246, 141, 149, 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, - 242, 249, 251, 152, 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, - 245, 247, 252, 253, 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, - 241, 248, 250, 254, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_16x16[256]) = { - 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, 179, - 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, 178, 196, - 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, 164, 186, 201, - 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, 153, 169, 193, 208, - 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, 133, 161, 176, 198, 214, - 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, 120, 140, 165, 185, 205, 221, - 22, 27, 32, 41, 48, 60, 73, 85, 99, 116, 130, 151, 175, 190, 211, 225, - 29, 35, 42, 49, 59, 69, 81, 95, 108, 125, 139, 155, 182, 197, 217, 229, - 38, 45, 51, 61, 68, 80, 93, 105, 118, 134, 150, 168, 191, 207, 223, 234, - 50, 56, 63, 74, 83, 94, 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, - 62, 70, 76, 87, 97, 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, - 75, 82, 90, 102, 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, - 89, 100, 111, 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, - 103, 115, 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, - 252, 121, 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, - 251, 254, 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, - 249, 253, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_32x32[1024]) = { - 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, 170, 193, 204, - 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, 377, 405, 455, 471, 495, - 527, 1, 4, 8, 15, 22, 30, 45, 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, - 228, 234, 237, 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, - 3, 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, 208, 217, 224, - 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, 440, 493, 525, 550, 567, - 6, 11, 16, 23, 31, 43, 60, 73, 90, 109, 126, 150, 173, 196, 211, 220, 226, - 232, 236, 239, 296, 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, - 9, 14, 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, 223, 244, - 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, 582, 596, 617, 645, - 13, 20, 26, 35, 44, 54, 72, 85, 105, 123, 140, 163, 182, 205, 216, 225, - 254, 271, 294, 314, 353, 373, 400, 423, 468, 491, 522, 548, 595, 616, 644, - 666, 21, 27, 33, 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, - 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, 643, 665, - 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, 159, 178, 197, 212, 221, 230, - 292, 312, 326, 334, 398, 421, 437, 446, 520, 546, 564, 574, 642, 664, 679, - 687, 34, 40, 46, 56, 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, - 340, 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, 723, - 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, 194, 252, 268, 290, - 311, 351, 370, 396, 420, 466, 488, 518, 545, 593, 613, 640, 663, 704, 722, - 746, 765, 51, 59, 66, 76, 89, 99, 119, 131, 149, 168, 181, 200, 267, 289, - 310, 325, 369, 395, 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, - 745, 764, 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, 288, - 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, 661, 677, 686, - 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, 137, 155, 242, 251, 266, 287, - 339, 350, 368, 393, 452, 465, 486, 515, 580, 592, 611, 637, 692, 703, 720, - 743, 788, 798, 813, 833, 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, - 308, 349, 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, - 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, 185, 264, - 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, 609, 635, 659, 676, - 718, 741, 761, 775, 811, 831, 847, 858, 117, 128, 136, 148, 160, 175, 188, - 198, 284, 306, 322, 332, 390, 415, 433, 444, 512, 540, 560, 572, 634, 658, - 675, 685, 740, 760, 774, 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, - 249, 263, 283, 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, - 691, 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, 174, - 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, 539, 589, 607, - 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, 874, 886, 902, 915, 176, - 187, 195, 202, 261, 281, 304, 321, 363, 387, 413, 432, 481, 509, 538, 559, - 606, 631, 656, 674, 715, 737, 758, 773, 808, 827, 844, 856, 885, 901, 914, - 923, 192, 199, 206, 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, - 558, 571, 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, - 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, 480, 507, - 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, 825, 866, 873, 884, - 899, 930, 936, 945, 957, 246, 259, 278, 302, 345, 361, 384, 411, 460, 479, - 506, 536, 587, 604, 628, 654, 698, 713, 734, 756, 793, 806, 824, 842, 872, - 883, 898, 912, 935, 944, 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, - 478, 505, 535, 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, - 854, 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, 409, - 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, 770, 780, 822, - 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, 975, 336, 344, 359, 381, - 449, 459, 477, 503, 577, 586, 602, 625, 689, 697, 711, 731, 785, 792, 804, - 821, 865, 871, 881, 895, 929, 934, 942, 953, 977, 981, 987, 995, 343, 358, - 380, 408, 458, 476, 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, - 803, 820, 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, - 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, 729, 752, - 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, 962, 970, 985, 993, - 1000, 1005, 378, 406, 427, 441, 500, 531, 554, 569, 622, 649, 669, 682, 728, - 751, 768, 779, 818, 837, 851, 860, 892, 907, 918, 925, 950, 961, 969, 974, - 992, 999, 1004, 1007, 448, 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, - 727, 784, 790, 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, - 984, 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, - 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, 938, 948, - 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, 529, 553, 597, - 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, 850, 876, 889, 905, 917, - 937, 947, 959, 968, 982, 989, 997, 1003, 1011, 1015, 1019, 1022, 496, 528, - 552, 568, 618, 646, 667, 681, 724, 748, 766, 778, 814, 834, 849, 859, 888, - 904, 916, 924, 946, 958, 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, - 1023, -}; - -const scan_order vp9_default_scan_orders[TX_SIZES] = { - {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, -}; - -const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES] = { - { // TX_4X4 - {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, - {row_scan_4x4, vp9_row_iscan_4x4, row_scan_4x4_neighbors}, - {col_scan_4x4, vp9_col_iscan_4x4, col_scan_4x4_neighbors}, - {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors} - }, { // TX_8X8 - {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, - {row_scan_8x8, vp9_row_iscan_8x8, row_scan_8x8_neighbors}, - {col_scan_8x8, vp9_col_iscan_8x8, col_scan_8x8_neighbors}, - {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors} - }, { // TX_16X16 - {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, - {row_scan_16x16, vp9_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp9_col_iscan_16x16, col_scan_16x16_neighbors}, - {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors} - }, { // TX_32X32 - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - } -}; diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.h b/thirdparty/libvpx/vp9/common/vp9_scan.h deleted file mode 100644 index 4c1ee8107c..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scan.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SCAN_H_ -#define VP9_COMMON_VP9_SCAN_H_ - -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -#include "vp9/common/vp9_enums.h" -#include "vp9/common/vp9_blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_NEIGHBORS 2 - -typedef struct { - const int16_t *scan; - const int16_t *iscan; - const int16_t *neighbors; -} scan_order; - -extern const scan_order vp9_default_scan_orders[TX_SIZES]; -extern const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES]; - -static INLINE int get_coef_context(const int16_t *neighbors, - const uint8_t *token_cache, int c) { - return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + - token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; -} - -static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, - PLANE_TYPE type, int block_idx) { - const MODE_INFO *const mi = xd->mi[0]; - - if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) { - return &vp9_default_scan_orders[tx_size]; - } else { - const PREDICTION_MODE mode = get_y_mode(mi, block_idx); - return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_SCAN_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.c b/thirdparty/libvpx/vp9/common/vp9_seg_common.c deleted file mode 100644 index 7af61629a0..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_seg_common.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_quant_common.h" - -static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; - -static const int seg_feature_data_max[SEG_LVL_MAX] = { - MAXQ, MAX_LOOP_FILTER, 3, 0 }; - -// These functions provide access to new segment level features. -// Eventually these function may be "optimized out" but for the moment, -// the coding mechanism is still subject to change so these provide a -// convenient single point of change. - -void vp9_clearall_segfeatures(struct segmentation *seg) { - vp9_zero(seg->feature_data); - vp9_zero(seg->feature_mask); - seg->aq_av_offset = 0; -} - -void vp9_enable_segfeature(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - seg->feature_mask[segment_id] |= 1 << feature_id; -} - -int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) { - return seg_feature_data_max[feature_id]; -} - -int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { - return seg_feature_data_signed[feature_id]; -} - -void vp9_set_segdata(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id, int seg_data) { - assert(seg_data <= seg_feature_data_max[feature_id]); - if (seg_data < 0) { - assert(seg_feature_data_signed[feature_id]); - assert(-seg_data <= seg_feature_data_max[feature_id]); - } - - seg->feature_data[segment_id][feature_id] = seg_data; -} - -const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = { - 2, 4, 6, 8, 10, 12, - 0, -1, -2, -3, -4, -5, -6, -7 -}; - - -// TBD? Functions to read and write segment data with range / validity checking diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.h b/thirdparty/libvpx/vp9/common/vp9_seg_common.h deleted file mode 100644 index 99a9440c17..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_seg_common.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SEG_COMMON_H_ -#define VP9_COMMON_VP9_SEG_COMMON_H_ - -#include "vpx_dsp/prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define SEGMENT_DELTADATA 0 -#define SEGMENT_ABSDATA 1 - -#define MAX_SEGMENTS 8 -#define SEG_TREE_PROBS (MAX_SEGMENTS-1) - -#define PREDICTION_PROBS 3 - -// Segment level features. -typedef enum { - SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... - SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... - SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame - SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode - SEG_LVL_MAX = 4 // Number of features supported -} SEG_LVL_FEATURES; - - -struct segmentation { - uint8_t enabled; - uint8_t update_map; - uint8_t update_data; - uint8_t abs_delta; - uint8_t temporal_update; - - vpx_prob tree_probs[SEG_TREE_PROBS]; - vpx_prob pred_probs[PREDICTION_PROBS]; - - int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; - uint32_t feature_mask[MAX_SEGMENTS]; - int aq_av_offset; -}; - -static INLINE int segfeature_active(const struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id) { - return seg->enabled && - (seg->feature_mask[segment_id] & (1 << feature_id)); -} - -void vp9_clearall_segfeatures(struct segmentation *seg); - -void vp9_enable_segfeature(struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id); - -int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id); - -int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id); - -void vp9_set_segdata(struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id, - int seg_data); - -static INLINE int get_segdata(const struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - return seg->feature_data[segment_id][feature_id]; -} - -extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_SEG_COMMON_H_ - diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.c b/thirdparty/libvpx/vp9/common/vp9_thread_common.c deleted file mode 100644 index db78d6be89..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_thread_common.c +++ /dev/null @@ -1,435 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_thread_common.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_loopfilter.h" - -#if CONFIG_MULTITHREAD -static INLINE void mutex_lock(pthread_mutex_t *const mutex) { - const int kMaxTryLocks = 4000; - int locked = 0; - int i; - - for (i = 0; i < kMaxTryLocks; ++i) { - if (!pthread_mutex_trylock(mutex)) { - locked = 1; - break; - } - } - - if (!locked) - pthread_mutex_lock(mutex); -} -#endif // CONFIG_MULTITHREAD - -static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - - if (r && !(c & (nsync - 1))) { - pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; - mutex_lock(mutex); - - while (c > lf_sync->cur_sb_col[r - 1] - nsync) { - pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); - } - pthread_mutex_unlock(mutex); - } -#else - (void)lf_sync; - (void)r; - (void)c; -#endif // CONFIG_MULTITHREAD -} - -static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, - const int sb_cols) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - int cur; - // Only signal when there are enough filtered SB for next row to run. - int sig = 1; - - if (c < sb_cols - 1) { - cur = c; - if (c % nsync) - sig = 0; - } else { - cur = sb_cols + nsync; - } - - if (sig) { - mutex_lock(&lf_sync->mutex_[r]); - - lf_sync->cur_sb_col[r] = cur; - - pthread_cond_signal(&lf_sync->cond_[r]); - pthread_mutex_unlock(&lf_sync->mutex_[r]); - } -#else - (void)lf_sync; - (void)r; - (void)c; - (void)sb_cols; -#endif // CONFIG_MULTITHREAD -} - -// Implement row loopfiltering for each thread. -static INLINE -void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, - VP9_COMMON *const cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only, - VP9LfSync *const lf_sync) { - const int num_planes = y_only ? 1 : MAX_MB_PLANE; - const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; - int mi_row, mi_col; - enum lf_path path; - if (y_only) - path = LF_PATH_444; - else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) - path = LF_PATH_420; - else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) - path = LF_PATH_444; - else - path = LF_PATH_SLOW; - - for (mi_row = start; mi_row < stop; - mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { - MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); - - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { - const int r = mi_row >> MI_BLOCK_SIZE_LOG2; - const int c = mi_col >> MI_BLOCK_SIZE_LOG2; - int plane; - - sync_read(lf_sync, r, c); - - vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - - vp9_adjust_mask(cm, mi_row, mi_col, lfm); - - vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); - for (plane = 1; plane < num_planes; ++plane) { - switch (path) { - case LF_PATH_420: - vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_444: - vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_SLOW: - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); - break; - } - } - - sync_write(lf_sync, r, c, sb_cols); - } - } -} - -// Row-based multi-threaded loopfilter hook -static int loop_filter_row_worker(VP9LfSync *const lf_sync, - LFWorkerData *const lf_data) { - thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only, - lf_sync); - return 1; -} - -static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, - VP9_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only, - VPxWorker *workers, int nworkers, - VP9LfSync *lf_sync) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - // Number of superblock rows and cols - const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - // Decoder may allocate more threads than number of tiles based on user's - // input. - const int tile_cols = 1 << cm->log2_tile_cols; - const int num_workers = VPXMIN(nworkers, tile_cols); - int i; - - if (!lf_sync->sync_range || sb_rows != lf_sync->rows || - num_workers > lf_sync->num_workers) { - vp9_loop_filter_dealloc(lf_sync); - vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); - } - - // Initialize cur_sb_col to -1 for all SB rows. - memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); - - // Set up loopfilter thread data. - // The decoder is capping num_workers because it has been observed that using - // more threads on the loopfilter than there are cores will hurt performance - // on Android. This is because the system will only schedule the tile decode - // workers on cores equal to the number of tile columns. Then if the decoder - // tries to use more threads for the loopfilter, it will hurt performance - // because of contention. If the multithreading code changes in the future - // then the number of workers used by the loopfilter should be revisited. - for (i = 0; i < num_workers; ++i) { - VPxWorker *const worker = &workers[i]; - LFWorkerData *const lf_data = &lf_sync->lfdata[i]; - - worker->hook = (VPxWorkerHook)loop_filter_row_worker; - worker->data1 = lf_sync; - worker->data2 = lf_data; - - // Loopfilter data - vp9_loop_filter_data_reset(lf_data, frame, cm, planes); - lf_data->start = start + i * MI_BLOCK_SIZE; - lf_data->stop = stop; - lf_data->y_only = y_only; - - // Start loopfiltering - if (i == num_workers - 1) { - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } - - // Wait till all rows are finished - for (i = 0; i < num_workers; ++i) { - winterface->sync(&workers[i]); - } -} - -void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - VP9_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int frame_filter_level, - int y_only, int partial_frame, - VPxWorker *workers, int num_workers, - VP9LfSync *lf_sync) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - - if (!frame_filter_level) return; - - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - vp9_loop_filter_frame_init(cm, frame_filter_level); - - loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, - y_only, workers, num_workers, lf_sync); -} - -// Set up nsync by width. -static INLINE int get_sync_range(int width) { - // nsync numbers are picked by testing. For example, for 4k - // video, using 4 gives best performance. - if (width < 640) - return 1; - else if (width <= 1280) - return 2; - else if (width <= 4096) - return 4; - else - return 8; -} - -// Allocate memory for lf row synchronization -void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, - int width, int num_workers) { - lf_sync->rows = rows; -#if CONFIG_MULTITHREAD - { - int i; - - CHECK_MEM_ERROR(cm, lf_sync->mutex_, - vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); - if (lf_sync->mutex_) { - for (i = 0; i < rows; ++i) { - pthread_mutex_init(&lf_sync->mutex_[i], NULL); - } - } - - CHECK_MEM_ERROR(cm, lf_sync->cond_, - vpx_malloc(sizeof(*lf_sync->cond_) * rows)); - if (lf_sync->cond_) { - for (i = 0; i < rows; ++i) { - pthread_cond_init(&lf_sync->cond_[i], NULL); - } - } - } -#endif // CONFIG_MULTITHREAD - - CHECK_MEM_ERROR(cm, lf_sync->lfdata, - vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); - lf_sync->num_workers = num_workers; - - CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, - vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); - - // Set up nsync. - lf_sync->sync_range = get_sync_range(width); -} - -// Deallocate lf synchronization related mutex and data -void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { - if (lf_sync != NULL) { -#if CONFIG_MULTITHREAD - int i; - - if (lf_sync->mutex_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->mutex_[i]); - } - vpx_free(lf_sync->mutex_); - } - if (lf_sync->cond_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->cond_[i]); - } - vpx_free(lf_sync->cond_); - } -#endif // CONFIG_MULTITHREAD - vpx_free(lf_sync->lfdata); - vpx_free(lf_sync->cur_sb_col); - // clear the structure as the source of this call may be a resize in which - // case this call will be followed by an _alloc() which may fail. - vp9_zero(*lf_sync); - } -} - -// Accumulate frame counts. -void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, - const FRAME_COUNTS *counts, int is_dec) { - int i, j, k, l, m; - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - for (j = 0; j < INTRA_MODES; j++) - accum->y_mode[i][j] += counts->y_mode[i][j]; - - for (i = 0; i < INTRA_MODES; i++) - for (j = 0; j < INTRA_MODES; j++) - accum->uv_mode[i][j] += counts->uv_mode[i][j]; - - for (i = 0; i < PARTITION_CONTEXTS; i++) - for (j = 0; j < PARTITION_TYPES; j++) - accum->partition[i][j] += counts->partition[i][j]; - - if (is_dec) { - int n; - for (i = 0; i < TX_SIZES; i++) - for (j = 0; j < PLANE_TYPES; j++) - for (k = 0; k < REF_TYPES; k++) - for (l = 0; l < COEF_BANDS; l++) - for (m = 0; m < COEFF_CONTEXTS; m++) { - accum->eob_branch[i][j][k][l][m] += - counts->eob_branch[i][j][k][l][m]; - for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - accum->coef[i][j][k][l][m][n] += - counts->coef[i][j][k][l][m][n]; - } - } else { - for (i = 0; i < TX_SIZES; i++) - for (j = 0; j < PLANE_TYPES; j++) - for (k = 0; k < REF_TYPES; k++) - for (l = 0; l < COEF_BANDS; l++) - for (m = 0; m < COEFF_CONTEXTS; m++) - accum->eob_branch[i][j][k][l][m] += - counts->eob_branch[i][j][k][l][m]; - // In the encoder, coef is only updated at frame - // level, so not need to accumulate it here. - // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - // accum->coef[i][j][k][l][m][n] += - // counts->coef[i][j][k][l][m][n]; - } - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - for (j = 0; j < SWITCHABLE_FILTERS; j++) - accum->switchable_interp[i][j] += counts->switchable_interp[i][j]; - - for (i = 0; i < INTER_MODE_CONTEXTS; i++) - for (j = 0; j < INTER_MODES; j++) - accum->inter_mode[i][j] += counts->inter_mode[i][j]; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->intra_inter[i][j] += counts->intra_inter[i][j]; - - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->comp_inter[i][j] += counts->comp_inter[i][j]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) - accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->comp_ref[i][j] += counts->comp_ref[i][j]; - - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES; j++) - accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j]; - - for (j = 0; j < TX_SIZES - 1; j++) - accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j]; - - for (j = 0; j < TX_SIZES - 2; j++) - accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j]; - } - - for (i = 0; i < TX_SIZES; i++) - accum->tx.tx_totals[i] += counts->tx.tx_totals[i]; - - for (i = 0; i < SKIP_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->skip[i][j] += counts->skip[i][j]; - - for (i = 0; i < MV_JOINTS; i++) - accum->mv.joints[i] += counts->mv.joints[i]; - - for (k = 0; k < 2; k++) { - nmv_component_counts *const comps = &accum->mv.comps[k]; - const nmv_component_counts *const comps_t = &counts->mv.comps[k]; - - for (i = 0; i < 2; i++) { - comps->sign[i] += comps_t->sign[i]; - comps->class0_hp[i] += comps_t->class0_hp[i]; - comps->hp[i] += comps_t->hp[i]; - } - - for (i = 0; i < MV_CLASSES; i++) - comps->classes[i] += comps_t->classes[i]; - - for (i = 0; i < CLASS0_SIZE; i++) { - comps->class0[i] += comps_t->class0[i]; - for (j = 0; j < MV_FP_SIZE; j++) - comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; - } - - for (i = 0; i < MV_OFFSET_BITS; i++) - for (j = 0; j < 2; j++) - comps->bits[i][j] += comps_t->bits[i][j]; - - for (i = 0; i < MV_FP_SIZE; i++) - comps->fp[i] += comps_t->fp[i]; - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.h b/thirdparty/libvpx/vp9/common/vp9_thread_common.h deleted file mode 100644 index b3b60c253f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_thread_common.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_ -#define VP9_COMMON_VP9_THREAD_COMMON_H_ -#include "./vpx_config.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vpx_util/vpx_thread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; -struct FRAME_COUNTS; - -// Loopfilter row synchronization -typedef struct VP9LfSyncData { -#if CONFIG_MULTITHREAD - pthread_mutex_t *mutex_; - pthread_cond_t *cond_; -#endif - // Allocate memory to store the loop-filtered superblock index in each row. - int *cur_sb_col; - // The optimal sync_range for different resolution and platform should be - // determined by testing. Currently, it is chosen to be a power-of-2 number. - int sync_range; - int rows; - - // Row-based parallel loopfilter data - LFWorkerData *lfdata; - int num_workers; -} VP9LfSync; - -// Allocate memory for loopfilter row synchronization. -void vp9_loop_filter_alloc(VP9LfSync *lf_sync, struct VP9Common *cm, int rows, - int width, int num_workers); - -// Deallocate loopfilter synchronization related mutex and data. -void vp9_loop_filter_dealloc(VP9LfSync *lf_sync); - -// Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - struct VP9Common *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int frame_filter_level, - int y_only, int partial_frame, - VPxWorker *workers, int num_workers, - VP9LfSync *lf_sync); - -void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, - const struct FRAME_COUNTS *counts, int is_dec); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_THREAD_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.c b/thirdparty/libvpx/vp9/common/vp9_tile_common.c deleted file mode 100644 index 9fcb97c854..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_tile_common.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_tile_common.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vpx_dsp/vpx_dsp_common.h" - -#define MIN_TILE_WIDTH_B64 4 -#define MAX_TILE_WIDTH_B64 64 - -static int get_tile_offset(int idx, int mis, int log2) { - const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2; - const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2; - return VPXMIN(offset, mis); -} - -void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) { - tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows); - tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows); -} - -void vp9_tile_set_col(TileInfo *tile, const VP9_COMMON *cm, int col) { - tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols); - tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols); -} - -void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) { - vp9_tile_set_row(tile, cm, row); - vp9_tile_set_col(tile, cm, col); -} - -static int get_min_log2_tile_cols(const int sb64_cols) { - int min_log2 = 0; - while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) - ++min_log2; - return min_log2; -} - -static int get_max_log2_tile_cols(const int sb64_cols) { - int max_log2 = 1; - while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) - ++max_log2; - return max_log2 - 1; -} - -void vp9_get_tile_n_bits(int mi_cols, - int *min_log2_tile_cols, int *max_log2_tile_cols) { - const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2; - *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols); - *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols); - assert(*min_log2_tile_cols <= *max_log2_tile_cols); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.h b/thirdparty/libvpx/vp9/common/vp9_tile_common.h deleted file mode 100644 index ae58805de1..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_tile_common.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_TILE_COMMON_H_ -#define VP9_COMMON_VP9_TILE_COMMON_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; - -typedef struct TileInfo { - int mi_row_start, mi_row_end; - int mi_col_start, mi_col_end; -} TileInfo; - -// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on -// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)' -void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm, - int row, int col); - -void vp9_tile_set_row(TileInfo *tile, const struct VP9Common *cm, int row); -void vp9_tile_set_col(TileInfo *tile, const struct VP9Common *cm, int col); - -void vp9_get_tile_n_bits(int mi_cols, - int *min_log2_tile_cols, int *max_log2_tile_cols); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_TILE_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c deleted file mode 100644 index 1c77b57ff1..0000000000 --- a/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp9_rtcd.h" -#include "vpx_dsp/x86/inv_txfm_sse2.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" -#include "vpx_ports/mem.h" - -void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - __m128i in[2]; - const __m128i zero = _mm_setzero_si128(); - const __m128i eight = _mm_set1_epi16(8); - - in[0] = load_input_data(input); - in[1] = load_input_data(input + 8); - - switch (tx_type) { - case 0: // DCT_DCT - idct4_sse2(in); - idct4_sse2(in); - break; - case 1: // ADST_DCT - idct4_sse2(in); - iadst4_sse2(in); - break; - case 2: // DCT_ADST - iadst4_sse2(in); - idct4_sse2(in); - break; - case 3: // ADST_ADST - iadst4_sse2(in); - iadst4_sse2(in); - break; - default: - assert(0); - break; - } - - // Final round and shift - in[0] = _mm_add_epi16(in[0], eight); - in[1] = _mm_add_epi16(in[1], eight); - - in[0] = _mm_srai_epi16(in[0], 4); - in[1] = _mm_srai_epi16(in[1], 4); - - // Reconstruction and Store - { - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); - __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi32(d0, - _mm_cvtsi32_si128(*(const int *)(dest + stride))); - d2 = _mm_unpacklo_epi32( - d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3))); - d0 = _mm_unpacklo_epi8(d0, zero); - d2 = _mm_unpacklo_epi8(d2, zero); - d0 = _mm_add_epi16(d0, in[0]); - d2 = _mm_add_epi16(d2, in[1]); - d0 = _mm_packus_epi16(d0, d2); - // store result[0] - *(int *)dest = _mm_cvtsi128_si32(d0); - // store result[1] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); - // store result[2] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); - // store result[3] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); - } -} - -void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - __m128i in[8]; - const __m128i zero = _mm_setzero_si128(); - const __m128i final_rounding = _mm_set1_epi16(1 << 4); - - // load input data - in[0] = load_input_data(input); - in[1] = load_input_data(input + 8 * 1); - in[2] = load_input_data(input + 8 * 2); - in[3] = load_input_data(input + 8 * 3); - in[4] = load_input_data(input + 8 * 4); - in[5] = load_input_data(input + 8 * 5); - in[6] = load_input_data(input + 8 * 6); - in[7] = load_input_data(input + 8 * 7); - - switch (tx_type) { - case 0: // DCT_DCT - idct8_sse2(in); - idct8_sse2(in); - break; - case 1: // ADST_DCT - idct8_sse2(in); - iadst8_sse2(in); - break; - case 2: // DCT_ADST - iadst8_sse2(in); - idct8_sse2(in); - break; - case 3: // ADST_ADST - iadst8_sse2(in); - iadst8_sse2(in); - break; - default: - assert(0); - break; - } - - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 5); - in[1] = _mm_srai_epi16(in[1], 5); - in[2] = _mm_srai_epi16(in[2], 5); - in[3] = _mm_srai_epi16(in[3], 5); - in[4] = _mm_srai_epi16(in[4], 5); - in[5] = _mm_srai_epi16(in[5], 5); - in[6] = _mm_srai_epi16(in[6], 5); - in[7] = _mm_srai_epi16(in[7], 5); - - RECON_AND_STORE(dest + 0 * stride, in[0]); - RECON_AND_STORE(dest + 1 * stride, in[1]); - RECON_AND_STORE(dest + 2 * stride, in[2]); - RECON_AND_STORE(dest + 3 * stride, in[3]); - RECON_AND_STORE(dest + 4 * stride, in[4]); - RECON_AND_STORE(dest + 5 * stride, in[5]); - RECON_AND_STORE(dest + 6 * stride, in[6]); - RECON_AND_STORE(dest + 7 * stride, in[7]); -} - -void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride, int tx_type) { - __m128i in0[16], in1[16]; - - load_buffer_8x16(input, in0); - input += 8; - load_buffer_8x16(input, in1); - - switch (tx_type) { - case 0: // DCT_DCT - idct16_sse2(in0, in1); - idct16_sse2(in0, in1); - break; - case 1: // ADST_DCT - idct16_sse2(in0, in1); - iadst16_sse2(in0, in1); - break; - case 2: // DCT_ADST - iadst16_sse2(in0, in1); - idct16_sse2(in0, in1); - break; - case 3: // ADST_ADST - iadst16_sse2(in0, in1); - iadst16_sse2(in0, in1); - break; - default: - assert(0); - break; - } - - write_buffer_8x16(dest, in0, stride); - dest += 8; - write_buffer_8x16(dest, in1, stride); -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c deleted file mode 100644 index d63912932c..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c +++ /dev/null @@ -1,2271 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include // qsort() - -#include "./vp9_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" - -#include "vpx_dsp/bitreader_buffer.h" -#include "vpx_dsp/bitreader.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/mem_ops.h" -#include "vpx_scale/vpx_scale.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_idct.h" -#include "vp9/common/vp9_thread_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_tile_common.h" - -#include "vp9/decoder/vp9_decodeframe.h" -#include "vp9/decoder/vp9_detokenize.h" -#include "vp9/decoder/vp9_decodemv.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/decoder/vp9_dsubexp.h" - -#define MAX_VP9_HEADER_SIZE 80 - -static int is_compound_reference_allowed(const VP9_COMMON *cm) { - int i; - for (i = 1; i < REFS_PER_FRAME; ++i) - if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) - return 1; - - return 0; -} - -static void setup_compound_reference_mode(VP9_COMMON *cm) { - if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[GOLDEN_FRAME]) { - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; - } else if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[ALTREF_FRAME]) { - cm->comp_fixed_ref = GOLDEN_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } else { - cm->comp_fixed_ref = LAST_FRAME; - cm->comp_var_ref[0] = GOLDEN_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } -} - -static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { - return len != 0 && len <= (size_t)(end - start); -} - -static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) { - const int data = vpx_rb_read_literal(rb, get_unsigned_bits(max)); - return data > max ? max : data; -} - -static TX_MODE read_tx_mode(vpx_reader *r) { - TX_MODE tx_mode = vpx_read_literal(r, 2); - if (tx_mode == ALLOW_32X32) - tx_mode += vpx_read_bit(r); - return tx_mode; -} - -static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) { - int i, j; - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 3; ++j) - vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 2; ++j) - vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 1; ++j) - vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); -} - -static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) { - int i, j; - for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) - for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) - vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); -} - -static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { - int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - for (j = 0; j < INTER_MODES - 1; ++j) - vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); -} - -static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm, - vpx_reader *r) { - if (is_compound_reference_allowed(cm)) { - return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT - : COMPOUND_REFERENCE) - : SINGLE_REFERENCE; - } else { - return SINGLE_REFERENCE; - } -} - -static void read_frame_reference_mode_probs(VP9_COMMON *cm, vpx_reader *r) { - FRAME_CONTEXT *const fc = cm->fc; - int i; - - if (cm->reference_mode == REFERENCE_MODE_SELECT) - for (i = 0; i < COMP_INTER_CONTEXTS; ++i) - vp9_diff_update_prob(r, &fc->comp_inter_prob[i]); - - if (cm->reference_mode != COMPOUND_REFERENCE) - for (i = 0; i < REF_CONTEXTS; ++i) { - vp9_diff_update_prob(r, &fc->single_ref_prob[i][0]); - vp9_diff_update_prob(r, &fc->single_ref_prob[i][1]); - } - - if (cm->reference_mode != SINGLE_REFERENCE) - for (i = 0; i < REF_CONTEXTS; ++i) - vp9_diff_update_prob(r, &fc->comp_ref_prob[i]); -} - -static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) { - int i; - for (i = 0; i < n; ++i) - if (vpx_read(r, MV_UPDATE_PROB)) - p[i] = (vpx_read_literal(r, 7) << 1) | 1; -} - -static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) { - int i, j; - - update_mv_probs(ctx->joints, MV_JOINTS - 1, r); - - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - update_mv_probs(&comp_ctx->sign, 1, r); - update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r); - update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r); - update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r); - } - - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - for (j = 0; j < CLASS0_SIZE; ++j) - update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r); - update_mv_probs(comp_ctx->fp, 3, r); - } - - if (allow_hp) { - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - update_mv_probs(&comp_ctx->class0_hp, 1, r); - update_mv_probs(&comp_ctx->hp, 1, r); - } - } -} - -static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, - const TX_SIZE tx_size, - uint8_t *dst, int stride, - int eob) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const dqcoeff = pd->dqcoeff; - assert(eob > 0); -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } - } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_idct4x4_add(dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_idct8x8_add(dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_idct16x16_add(dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } - } -#else - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_idct4x4_add(dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_idct8x8_add(dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_idct16x16_add(dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } -} - -static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, - const TX_TYPE tx_type, - const TX_SIZE tx_size, - uint8_t *dst, int stride, - int eob) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const dqcoeff = pd->dqcoeff; - assert(eob > 0); -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } - } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } - } -#else - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } -} - -static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, - vpx_reader *r, - MODE_INFO *const mi, - int plane, - int row, int col, - TX_SIZE tx_size) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; - uint8_t *dst; - dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; - - if (mi->sb_type < BLOCK_8X8) - if (plane == 0) - mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; - - vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, - dst, pd->dst.stride, dst, pd->dst.stride, - col, row, plane); - - if (!mi->skip) { - const TX_TYPE tx_type = (plane || xd->lossless) ? - DCT_DCT : intra_mode_to_tx_type_lookup[mode]; - const scan_order *sc = (plane || xd->lossless) ? - &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; - const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, - r, mi->segment_id); - if (eob > 0) { - inverse_transform_block_intra(xd, plane, tx_type, tx_size, - dst, pd->dst.stride, eob); - } - } -} - -static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, - MODE_INFO *const mi, int plane, - int row, int col, TX_SIZE tx_size) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const scan_order *sc = &vp9_default_scan_orders[tx_size]; - const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, - mi->segment_id); - - if (eob > 0) { - inverse_transform_block_inter( - xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], - pd->dst.stride, eob); - } - return eob; -} - -static void build_mc_border(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int x, int y, int b_w, int b_h, int w, int h) { - // Get a pointer to the start of the real data for this row. - const uint8_t *ref_row = src - x - y * src_stride; - - if (y >= h) - ref_row += (h - 1) * src_stride; - else if (y > 0) - ref_row += y * src_stride; - - do { - int right = 0, copy; - int left = x < 0 ? -x : 0; - - if (left > b_w) - left = b_w; - - if (x + b_w > w) - right = x + b_w - w; - - if (right > b_w) - right = b_w; - - copy = b_w - left - right; - - if (left) - memset(dst, ref_row[0], left); - - if (copy) - memcpy(dst + left, ref_row + x + left, copy); - - if (right) - memset(dst + left + copy, ref_row[w - 1], right); - - dst += dst_stride; - ++y; - - if (y > 0 && y < h) - ref_row += src_stride; - } while (--b_h); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void high_build_mc_border(const uint8_t *src8, int src_stride, - uint16_t *dst, int dst_stride, - int x, int y, int b_w, int b_h, - int w, int h) { - // Get a pointer to the start of the real data for this row. - const uint16_t *src = CONVERT_TO_SHORTPTR(src8); - const uint16_t *ref_row = src - x - y * src_stride; - - if (y >= h) - ref_row += (h - 1) * src_stride; - else if (y > 0) - ref_row += y * src_stride; - - do { - int right = 0, copy; - int left = x < 0 ? -x : 0; - - if (left > b_w) - left = b_w; - - if (x + b_w > w) - right = x + b_w - w; - - if (right > b_w) - right = b_w; - - copy = b_w - left - right; - - if (left) - vpx_memset16(dst, ref_row[0], left); - - if (copy) - memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t)); - - if (right) - vpx_memset16(dst + left + copy, ref_row[w - 1], right); - - dst += dst_stride; - ++y; - - if (y > 0 && y < h) - ref_row += src_stride; - } while (--b_h); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#if CONFIG_VP9_HIGHBITDEPTH -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, - int border_offset, - uint8_t *const dst, int dst_buf_stride, - int subpel_x, int subpel_y, - const InterpKernel *kernel, - const struct scale_factors *sf, - MACROBLOCKD *xd, - int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); - const uint8_t *buf_ptr; - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, - x0, y0, b_w, b_h, frame_width, frame_height); - buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset; - } else { - build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w, - x0, y0, b_w, b_h, frame_width, frame_height); - buf_ptr = ((uint8_t *)mc_buf_high) + border_offset; - } - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); - } else { - inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); - } -} -#else -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, - int border_offset, - uint8_t *const dst, int dst_buf_stride, - int subpel_x, int subpel_y, - const InterpKernel *kernel, - const struct scale_factors *sf, - int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); - const uint8_t *buf_ptr; - - build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, - x0, y0, b_w, b_h, frame_width, frame_height); - buf_ptr = mc_buf + border_offset; - - inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd, - int plane, int bw, int bh, int x, - int y, int w, int h, int mi_x, int mi_y, - const InterpKernel *kernel, - const struct scale_factors *sf, - struct buf_2d *pre_buf, - struct buf_2d *dst_buf, const MV* mv, - RefCntBuffer *ref_frame_buf, - int is_scaled, int ref) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - MV32 scaled_mv; - int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, - buf_stride, subpel_x, subpel_y; - uint8_t *ref_frame, *buf_ptr; - - // Get reference frame pointer, width and height. - if (plane == 0) { - frame_width = ref_frame_buf->buf.y_crop_width; - frame_height = ref_frame_buf->buf.y_crop_height; - ref_frame = ref_frame_buf->buf.y_buffer; - } else { - frame_width = ref_frame_buf->buf.uv_crop_width; - frame_height = ref_frame_buf->buf.uv_crop_height; - ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer - : ref_frame_buf->buf.v_buffer; - } - - if (is_scaled) { - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh, - pd->subsampling_x, - pd->subsampling_y); - // Co-ordinate of containing block to pixel precision. - int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); - int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); -#if CONFIG_BETTER_HW_COMPATIBILITY - assert(xd->mi[0]->sb_type != BLOCK_4X8 && - xd->mi[0]->sb_type != BLOCK_8X4); - assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) && - mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x))); -#endif - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = (x_start + x) << SUBPEL_BITS; - y0_16 = (y_start + y) << SUBPEL_BITS; - - // Co-ordinate of current block in reference frame - // to 1/16th pixel precision. - x0_16 = sf->scale_value_x(x0_16, sf); - y0_16 = sf->scale_value_y(y0_16, sf); - - // Map the top left corner of the block into the reference frame. - x0 = sf->scale_value_x(x_start + x, sf); - y0 = sf->scale_value_y(y_start + y, sf); - - // Scale the MV and incorporate the sub-pixel offset of the block - // in the reference frame. - scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); - xs = sf->x_step_q4; - ys = sf->y_step_q4; - } else { - // Co-ordinate of containing block to pixel precision. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = x0 << SUBPEL_BITS; - y0_16 = y0 << SUBPEL_BITS; - - scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y)); - scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x)); - xs = ys = 16; - } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - - // Calculate the top left corner of the best matching block in the - // reference frame. - x0 += scaled_mv.col >> SUBPEL_BITS; - y0 += scaled_mv.row >> SUBPEL_BITS; - x0_16 += scaled_mv.col; - y0_16 += scaled_mv.row; - - // Get reference block pointer. - buf_ptr = ref_frame + y0 * pre_buf->stride + x0; - buf_stride = pre_buf->stride; - - // Do border extension if there is motion or the - // width/height is not a multiple of 8 pixels. - if (is_scaled || scaled_mv.col || scaled_mv.row || - (frame_width & 0x7) || (frame_height & 0x7)) { - int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; - - // Get reference block bottom right horizontal coordinate. - int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; - int x_pad = 0, y_pad = 0; - - if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { - x0 -= VP9_INTERP_EXTEND - 1; - x1 += VP9_INTERP_EXTEND; - x_pad = 1; - } - - if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { - y0 -= VP9_INTERP_EXTEND - 1; - y1 += VP9_INTERP_EXTEND; - y_pad = 1; - } - - // Wait until reference block is ready. Pad 7 more pixels as last 7 - // pixels of each superblock row can be changed by next superblock row. - if (worker != NULL) - vp9_frameworker_wait(worker, ref_frame_buf, - VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); - - // Skip border extension if block is inside the frame. - if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || - y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { - // Extend the border. - const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0; - const int b_w = x1 - x0 + 1; - const int b_h = y1 - y0 + 1; - const int border_offset = y_pad * 3 * b_w + x_pad * 3; - - extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, - frame_width, frame_height, border_offset, - dst, dst_buf->stride, - subpel_x, subpel_y, - kernel, sf, -#if CONFIG_VP9_HIGHBITDEPTH - xd, -#endif - w, h, ref, xs, ys); - return; - } - } else { - // Wait until reference block is ready. Pad 7 more pixels as last 7 - // pixels of each superblock row can be changed by next superblock row. - if (worker != NULL) { - const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; - vp9_frameworker_wait(worker, ref_frame_buf, - VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); - } - } -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); - } else { - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); - } -#else - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH -} - -static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, - MACROBLOCKD *xd, - int mi_row, int mi_col) { - int plane; - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - const MODE_INFO *mi = xd->mi[0]; - const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; - const BLOCK_SIZE sb_type = mi->sb_type; - const int is_compound = has_second_ref(mi); - int ref; - int is_scaled; - VPxWorker *const fwo = pbi->frame_parallel_decode ? - pbi->frame_worker_owner : NULL; - - for (ref = 0; ref < 1 + is_compound; ++ref) { - const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; - RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME]; - const struct scale_factors *const sf = &ref_buf->sf; - const int idx = ref_buf->idx; - BufferPool *const pool = pbi->common.buffer_pool; - RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; - - if (!vp9_is_valid_scale(sf)) - vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, - "Reference frame has invalid dimensions"); - - is_scaled = vp9_is_scaled(sf); - vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, - is_scaled ? sf : NULL); - xd->block_refs[ref] = ref_buf; - - if (sb_type < BLOCK_8X8) { - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - struct buf_2d *const dst_buf = &pd->dst; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int n4w_x4 = 4 * num_4x4_w; - const int n4h_x4 = 4 * num_4x4_h; - struct buf_2d *const pre_buf = &pd->pre[ref]; - int i = 0, x, y; - for (y = 0; y < num_4x4_h; ++y) { - for (x = 0; x < num_4x4_w; ++x) { - const MV mv = average_split_mvs(pd, mi, ref, i++); - dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, - 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel, - sf, pre_buf, dst_buf, &mv, - ref_frame_buf, is_scaled, ref); - } - } - } - } else { - const MV mv = mi->mv[ref].as_mv; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - struct buf_2d *const dst_buf = &pd->dst; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int n4w_x4 = 4 * num_4x4_w; - const int n4h_x4 = 4 * num_4x4_h; - struct buf_2d *const pre_buf = &pd->pre[ref]; - dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, - 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, - sf, pre_buf, dst_buf, &mv, - ref_frame_buf, is_scaled, ref); - } - } - } -} - -static INLINE TX_SIZE dec_get_uv_tx_size(const MODE_INFO *mi, - int n4_wl, int n4_hl) { - // get minimum log2 num4x4s dimension - const int x = VPXMIN(n4_wl, n4_hl); - return VPXMIN(mi->tx_size, x); -} - -static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) { - struct macroblockd_plane *const pd = &xd->plane[i]; - memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_w); - memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_h); - } -} - -static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, - int bhl) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x; - xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y; - xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x; - xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y; - } -} - -static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int bw, int bh, int x_mis, int y_mis, - int bwl, int bhl) { - const int offset = mi_row * cm->mi_stride + mi_col; - int x, y; - const TileInfo *const tile = &xd->tile; - - xd->mi = cm->mi_grid_visible + offset; - xd->mi[0] = &cm->mi[offset]; - // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of - // passing bsize from decode_partition(). - xd->mi[0]->sb_type = bsize; - for (y = 0; y < y_mis; ++y) - for (x = !y; x < x_mis; ++x) { - xd->mi[y * cm->mi_stride + x] = xd->mi[0]; - } - - set_plane_n4(xd, bw, bh, bwl, bhl); - - set_skip_context(xd, mi_row, mi_col); - - // Distance of Mb to the various image edges. These are specified to 8th pel - // as they are always compared to values that are in 1/8th pel units - set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - - vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); - return xd->mi[0]; -} - -static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, - int mi_row, int mi_col, - vpx_reader *r, BLOCK_SIZE bsize, - int bwl, int bhl) { - VP9_COMMON *const cm = &pbi->common; - const int less8x8 = bsize < BLOCK_8X8; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); - const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); - - MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, - bw, bh, x_mis, y_mis, bwl, bhl); - - if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { - const BLOCK_SIZE uv_subsize = - ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; - if (uv_subsize == BLOCK_INVALID) - vpx_internal_error(xd->error_info, - VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); - } - - vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - - if (mi->skip) { - dec_reset_skip_context(xd); - } - - if (!is_inter_block(mi)) { - int plane; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) - : mi->tx_size; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int step = (1 << tx_size); - int row, col; - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; - xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; - - for (row = 0; row < max_blocks_high; row += step) - for (col = 0; col < max_blocks_wide; col += step) - predict_and_reconstruct_intra_block(xd, r, mi, plane, - row, col, tx_size); - } - } else { - // Prediction - dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); - - // Reconstruction - if (!mi->skip) { - int eobtotal = 0; - int plane; - - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) - : mi->tx_size; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int step = (1 << tx_size); - int row, col; - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; - xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; - - for (row = 0; row < max_blocks_high; row += step) - for (col = 0; col < max_blocks_wide; col += step) - eobtotal += reconstruct_inter_block(xd, r, mi, plane, row, col, - tx_size); - } - - if (!less8x8 && eobtotal == 0) - mi->skip = 1; // skip loopfilter - } - } - - xd->corrupted |= vpx_reader_has_error(r); - - if (cm->lf.filter_level) { - vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); - } -} - -static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd, - int mi_row, int mi_col, - int bsl) { - const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; - -// assert(bsl >= 0); - - return (left * 2 + above) + bsl * PARTITION_PLOFFSET; -} - -static INLINE void dec_update_partition_context(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE subsize, - int bw) { - PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - - // update the partition context at the end notes. set partition bits - // of block sizes larger than the current one to be one, and partition - // bits of smaller block sizes to be zero. - memset(above_ctx, partition_context_lookup[subsize].above, bw); - memset(left_ctx, partition_context_lookup[subsize].left, bw); -} - -static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, - vpx_reader *r, - int has_rows, int has_cols, int bsl) { - const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl); - const vpx_prob *const probs = get_partition_probs(xd, ctx); - FRAME_COUNTS *counts = xd->counts; - PARTITION_TYPE p; - - if (has_rows && has_cols) - p = (PARTITION_TYPE)vpx_read_tree(r, vp9_partition_tree, probs); - else if (!has_rows && has_cols) - p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; - else if (has_rows && !has_cols) - p = vpx_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; - else - p = PARTITION_SPLIT; - - if (counts) - ++counts->partition[ctx][p]; - - return p; -} - -// TODO(slavarnway): eliminate bsize and subsize in future commits -static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd, - int mi_row, int mi_col, - vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) { - VP9_COMMON *const cm = &pbi->common; - const int n8x8_l2 = n4x4_l2 - 1; - const int num_8x8_wh = 1 << n8x8_l2; - const int hbs = num_8x8_wh >> 1; - PARTITION_TYPE partition; - BLOCK_SIZE subsize; - const int has_rows = (mi_row + hbs) < cm->mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_cols; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - partition = read_partition(xd, mi_row, mi_col, r, has_rows, has_cols, - n8x8_l2); - subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); - if (!hbs) { - // calculate bmode block dimensions (log 2) - xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); - xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); - decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1); - } else { - switch (partition) { - case PARTITION_NONE: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); - break; - case PARTITION_HORZ: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); - if (has_rows) - decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2, - n8x8_l2); - break; - case PARTITION_VERT: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); - if (has_cols) - decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2, - n4x4_l2); - break; - case PARTITION_SPLIT: - decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize, - n8x8_l2); - break; - default: - assert(0 && "Invalid partition type"); - } - } - - // update partition context - if (bsize >= BLOCK_8X8 && - (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh); -} - -static void setup_token_decoder(const uint8_t *data, - const uint8_t *data_end, - size_t read_size, - struct vpx_internal_error_info *error_info, - vpx_reader *r, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - // Validate the calculated partition length. If the buffer - // described by the partition can't be fully read, then restrict - // it to the portion that can be (for EC mode) or throw an error. - if (!read_is_valid(data, read_size, data_end)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); - - if (vpx_reader_init(r, data, read_size, decrypt_cb, decrypt_state)) - vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder %d", 1); -} - -static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, - vpx_reader *r) { - int i, j, k, l, m; - - if (vpx_read_bit(r)) - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) - for (m = 0; m < UNCONSTRAINED_NODES; ++m) - vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); -} - -static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, - vpx_reader *r) { - const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; - TX_SIZE tx_size; - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - read_coef_probs_common(fc->coef_probs[tx_size], r); -} - -static void setup_segmentation(struct segmentation *seg, - struct vpx_read_bit_buffer *rb) { - int i, j; - - seg->update_map = 0; - seg->update_data = 0; - - seg->enabled = vpx_rb_read_bit(rb); - if (!seg->enabled) - return; - - // Segmentation map update - seg->update_map = vpx_rb_read_bit(rb); - if (seg->update_map) { - for (i = 0; i < SEG_TREE_PROBS; i++) - seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) - : MAX_PROB; - - seg->temporal_update = vpx_rb_read_bit(rb); - if (seg->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) - seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) - : MAX_PROB; - } else { - for (i = 0; i < PREDICTION_PROBS; i++) - seg->pred_probs[i] = MAX_PROB; - } - } - - // Segmentation data update - seg->update_data = vpx_rb_read_bit(rb); - if (seg->update_data) { - seg->abs_delta = vpx_rb_read_bit(rb); - - vp9_clearall_segfeatures(seg); - - for (i = 0; i < MAX_SEGMENTS; i++) { - for (j = 0; j < SEG_LVL_MAX; j++) { - int data = 0; - const int feature_enabled = vpx_rb_read_bit(rb); - if (feature_enabled) { - vp9_enable_segfeature(seg, i, j); - data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j)); - if (vp9_is_segfeature_signed(j)) - data = vpx_rb_read_bit(rb) ? -data : data; - } - vp9_set_segdata(seg, i, j, data); - } - } - } -} - -static void setup_loopfilter(struct loopfilter *lf, - struct vpx_read_bit_buffer *rb) { - lf->filter_level = vpx_rb_read_literal(rb, 6); - lf->sharpness_level = vpx_rb_read_literal(rb, 3); - - // Read in loop filter deltas applied at the MB level based on mode or ref - // frame. - lf->mode_ref_delta_update = 0; - - lf->mode_ref_delta_enabled = vpx_rb_read_bit(rb); - if (lf->mode_ref_delta_enabled) { - lf->mode_ref_delta_update = vpx_rb_read_bit(rb); - if (lf->mode_ref_delta_update) { - int i; - - for (i = 0; i < MAX_REF_LF_DELTAS; i++) - if (vpx_rb_read_bit(rb)) - lf->ref_deltas[i] = vpx_rb_read_signed_literal(rb, 6); - - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) - if (vpx_rb_read_bit(rb)) - lf->mode_deltas[i] = vpx_rb_read_signed_literal(rb, 6); - } - } -} - -static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { - return vpx_rb_read_bit(rb) ? vpx_rb_read_signed_literal(rb, 4) : 0; -} - -static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, - struct vpx_read_bit_buffer *rb) { - cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS); - cm->y_dc_delta_q = read_delta_q(rb); - cm->uv_dc_delta_q = read_delta_q(rb); - cm->uv_ac_delta_q = read_delta_q(rb); - cm->dequant_bit_depth = cm->bit_depth; - xd->lossless = cm->base_qindex == 0 && - cm->y_dc_delta_q == 0 && - cm->uv_dc_delta_q == 0 && - cm->uv_ac_delta_q == 0; - -#if CONFIG_VP9_HIGHBITDEPTH - xd->bd = (int)cm->bit_depth; -#endif -} - -static void setup_segmentation_dequant(VP9_COMMON *const cm) { - // Build y/uv dequant values based on segmentation. - if (cm->seg.enabled) { - int i; - for (i = 0; i < MAX_SEGMENTS; ++i) { - const int qindex = vp9_get_qindex(&cm->seg, i, cm->base_qindex); - cm->y_dequant[i][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, - cm->bit_depth); - cm->y_dequant[i][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); - cm->uv_dequant[i][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, - cm->bit_depth); - cm->uv_dequant[i][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, - cm->bit_depth); - } - } else { - const int qindex = cm->base_qindex; - // When segmentation is disabled, only the first value is used. The - // remaining are don't cares. - cm->y_dequant[0][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); - cm->y_dequant[0][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); - cm->uv_dequant[0][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, - cm->bit_depth); - cm->uv_dequant[0][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, - cm->bit_depth); - } -} - -static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { - const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR }; - return vpx_rb_read_bit(rb) ? SWITCHABLE - : literal_to_filter[vpx_rb_read_literal(rb, 2)]; -} - -static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - cm->render_width = cm->width; - cm->render_height = cm->height; - if (vpx_rb_read_bit(rb)) - vp9_read_frame_size(rb, &cm->render_width, &cm->render_height); -} - -static void resize_mv_buffer(VP9_COMMON *cm) { - vpx_free(cm->cur_frame->mvs); - cm->cur_frame->mi_rows = cm->mi_rows; - cm->cur_frame->mi_cols = cm->mi_cols; - CHECK_MEM_ERROR(cm, cm->cur_frame->mvs, - (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, - sizeof(*cm->cur_frame->mvs))); -} - -static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { -#if CONFIG_SIZE_LIMIT - if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Dimensions of %dx%d beyond allowed size of %dx%d.", - width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); -#endif - if (cm->width != width || cm->height != height) { - const int new_mi_rows = - ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2; - const int new_mi_cols = - ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2; - - // Allocations in vp9_alloc_context_buffers() depend on individual - // dimensions as well as the overall size. - if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { - if (vp9_alloc_context_buffers(cm, width, height)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate context buffers"); - } else { - vp9_set_mb_mi(cm, width, height); - } - vp9_init_context_buffers(cm); - cm->width = width; - cm->height = height; - } - if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows || - cm->mi_cols > cm->cur_frame->mi_cols) { - resize_mv_buffer(cm); - } -} - -static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - int width, height; - BufferPool *const pool = cm->buffer_pool; - vp9_read_frame_size(rb, &width, &height); - resize_context_buffers(cm, width, height); - setup_render_size(cm, rb); - - lock_buffer_pool(pool); - if (vpx_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_DEC_BORDER_IN_PIXELS, - cm->byte_alignment, - &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, - pool->cb_priv)) { - unlock_buffer_pool(pool); - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - } - unlock_buffer_pool(pool); - - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; - pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; - pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; - pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; - pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; - pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; -} - -static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, - int ref_xss, int ref_yss, - vpx_bit_depth_t this_bit_depth, - int this_xss, int this_yss) { - return ref_bit_depth == this_bit_depth && ref_xss == this_xss && - ref_yss == this_yss; -} - -static void setup_frame_size_with_refs(VP9_COMMON *cm, - struct vpx_read_bit_buffer *rb) { - int width, height; - int found = 0, i; - int has_valid_ref_frame = 0; - BufferPool *const pool = cm->buffer_pool; - for (i = 0; i < REFS_PER_FRAME; ++i) { - if (vpx_rb_read_bit(rb)) { - if (cm->frame_refs[i].idx != INVALID_IDX) { - YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; - width = buf->y_crop_width; - height = buf->y_crop_height; - found = 1; - break; - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Failed to decode frame size"); - } - } - } - - if (!found) - vp9_read_frame_size(rb, &width, &height); - - if (width <= 0 || height <= 0) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame size"); - - // Check to make sure at least one of frames that this frame references - // has valid dimensions. - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_frame = &cm->frame_refs[i]; - has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX && - valid_ref_frame_size(ref_frame->buf->y_crop_width, - ref_frame->buf->y_crop_height, - width, height)); - } - if (!has_valid_ref_frame) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Referenced frame has invalid size"); - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_frame = &cm->frame_refs[i]; - if (ref_frame->idx == INVALID_IDX || - !valid_ref_frame_img_fmt(ref_frame->buf->bit_depth, - ref_frame->buf->subsampling_x, - ref_frame->buf->subsampling_y, - cm->bit_depth, - cm->subsampling_x, - cm->subsampling_y)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Referenced frame has incompatible color format"); - } - - resize_context_buffers(cm, width, height); - setup_render_size(cm, rb); - - lock_buffer_pool(pool); - if (vpx_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_DEC_BORDER_IN_PIXELS, - cm->byte_alignment, - &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, - pool->cb_priv)) { - unlock_buffer_pool(pool); - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - } - unlock_buffer_pool(pool); - - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; - pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; - pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; - pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; - pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; - pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; -} - -static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - int min_log2_tile_cols, max_log2_tile_cols, max_ones; - vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - - // columns - max_ones = max_log2_tile_cols - min_log2_tile_cols; - cm->log2_tile_cols = min_log2_tile_cols; - while (max_ones-- && vpx_rb_read_bit(rb)) - cm->log2_tile_cols++; - - if (cm->log2_tile_cols > 6) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid number of tile columns"); - - // rows - cm->log2_tile_rows = vpx_rb_read_bit(rb); - if (cm->log2_tile_rows) - cm->log2_tile_rows += vpx_rb_read_bit(rb); -} - -// Reads the next tile returning its size and adjusting '*data' accordingly -// based on 'is_last'. -static void get_tile_buffer(const uint8_t *const data_end, - int is_last, - struct vpx_internal_error_info *error_info, - const uint8_t **data, - vpx_decrypt_cb decrypt_cb, void *decrypt_state, - TileBuffer *buf) { - size_t size; - - if (!is_last) { - if (!read_is_valid(*data, 4, data_end)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); - - if (decrypt_cb) { - uint8_t be_data[4]; - decrypt_cb(decrypt_state, *data, be_data, 4); - size = mem_get_be32(be_data); - } else { - size = mem_get_be32(*data); - } - *data += 4; - - if (size > (size_t)(data_end - *data)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile size"); - } else { - size = data_end - *data; - } - - buf->data = *data; - buf->size = size; - - *data += size; -} - -static void get_tile_buffers(VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - int tile_cols, int tile_rows, - TileBuffer (*tile_buffers)[1 << 6]) { - int r, c; - - for (r = 0; r < tile_rows; ++r) { - for (c = 0; c < tile_cols; ++c) { - const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); - TileBuffer *const buf = &tile_buffers[r][c]; - buf->col = c; - get_tile_buffer(data_end, is_last, &pbi->common.error, &data, - pbi->decrypt_cb, pbi->decrypt_state, buf); - } - } -} - -static const uint8_t *decode_tiles(VP9Decoder *pbi, - const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const cm = &pbi->common; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - TileBuffer tile_buffers[4][1 << 6]; - int tile_row, tile_col; - int mi_row, mi_col; - TileWorkerData *tile_data = NULL; - - if (cm->lf.filter_level && !cm->skip_loop_filter && - pbi->lf_worker.data1 == NULL) { - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, - vpx_memalign(32, sizeof(LFWorkerData))); - pbi->lf_worker.hook = (VPxWorkerHook)vp9_loop_filter_worker; - if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Loop filter thread creation failed"); - } - } - - if (cm->lf.filter_level && !cm->skip_loop_filter) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - // Be sure to sync as we might be resuming after a failed frame decode. - winterface->sync(&pbi->lf_worker); - vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm, - pbi->mb.plane); - } - - assert(tile_rows <= 4); - assert(tile_cols <= (1 << 6)); - - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); - - memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_cols); - - vp9_reset_lfm(cm); - - get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); - - // Load all tile information into tile_data. - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; - tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col; - tile_data->xd = pbi->mb; - tile_data->xd.corrupted = 0; - tile_data->xd.counts = - cm->frame_parallel_decoding_mode ? NULL : &cm->counts; - vp9_zero(tile_data->dqcoeff); - vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &tile_data->bit_reader, pbi->decrypt_cb, - pbi->decrypt_state); - vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); - } - } - - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - TileInfo tile; - vp9_tile_set_row(&tile, cm, tile_row); - for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; - mi_row += MI_BLOCK_SIZE) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const int col = pbi->inv_tile_order ? - tile_cols - tile_col - 1 : tile_col; - tile_data = pbi->tile_worker_data + tile_cols * tile_row + col; - vp9_tile_set_col(&tile, cm, col); - vp9_zero(tile_data->xd.left_context); - vp9_zero(tile_data->xd.left_seg_context); - for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; - mi_col += MI_BLOCK_SIZE) { - decode_partition(pbi, &tile_data->xd, mi_row, - mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); - } - pbi->mb.corrupted |= tile_data->xd.corrupted; - if (pbi->mb.corrupted) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Failed to decode tile data"); - } - // Loopfilter one row. - if (cm->lf.filter_level && !cm->skip_loop_filter) { - const int lf_start = mi_row - MI_BLOCK_SIZE; - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - - // delay the loopfilter by 1 macroblock row. - if (lf_start < 0) continue; - - // decoding has completed: finish up the loop filter in this thread. - if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue; - - winterface->sync(&pbi->lf_worker); - lf_data->start = lf_start; - lf_data->stop = mi_row; - if (pbi->max_threads > 1) { - winterface->launch(&pbi->lf_worker); - } else { - winterface->execute(&pbi->lf_worker); - } - } - // After loopfiltering, the last 7 row pixels in each superblock row may - // still be changed by the longest loopfilter of the next superblock - // row. - if (pbi->frame_parallel_decode) - vp9_frameworker_broadcast(pbi->cur_buf, - mi_row << MI_BLOCK_SIZE_LOG2); - } - } - - // Loopfilter remaining rows in the frame. - if (cm->lf.filter_level && !cm->skip_loop_filter) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - winterface->sync(&pbi->lf_worker); - lf_data->start = lf_data->stop; - lf_data->stop = cm->mi_rows; - winterface->execute(&pbi->lf_worker); - } - - // Get last tile data. - tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; - - if (pbi->frame_parallel_decode) - vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX); - return vpx_reader_find_end(&tile_data->bit_reader); -} - -// On entry 'tile_data->data_end' points to the end of the input frame, on exit -// it is updated to reflect the bitreader position of the final tile column if -// present in the tile buffer group or NULL otherwise. -static int tile_worker_hook(TileWorkerData *const tile_data, - VP9Decoder *const pbi) { - TileInfo *volatile tile = &tile_data->xd.tile; - const int final_col = (1 << pbi->common.log2_tile_cols) - 1; - const uint8_t *volatile bit_reader_end = NULL; - volatile int n = tile_data->buf_start; - tile_data->error_info.setjmp = 1; - - if (setjmp(tile_data->error_info.jmp)) { - tile_data->error_info.setjmp = 0; - tile_data->xd.corrupted = 1; - tile_data->data_end = NULL; - return 0; - } - - tile_data->xd.error_info = &tile_data->error_info; - tile_data->xd.corrupted = 0; - - do { - int mi_row, mi_col; - const TileBuffer *const buf = pbi->tile_buffers + n; - vp9_zero(tile_data->dqcoeff); - vp9_tile_init(tile, &pbi->common, 0, buf->col); - setup_token_decoder(buf->data, tile_data->data_end, buf->size, - &tile_data->error_info, &tile_data->bit_reader, - pbi->decrypt_cb, pbi->decrypt_state); - vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff); - - for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; - mi_row += MI_BLOCK_SIZE) { - vp9_zero(tile_data->xd.left_context); - vp9_zero(tile_data->xd.left_seg_context); - for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) { - decode_partition(pbi, &tile_data->xd, mi_row, mi_col, - &tile_data->bit_reader, BLOCK_64X64, 4); - } - } - - if (buf->col == final_col) { - bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); - } - } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); - - tile_data->data_end = bit_reader_end; - return !tile_data->xd.corrupted; -} - -// sorts in descending order -static int compare_tile_buffers(const void *a, const void *b) { - const TileBuffer *const buf1 = (const TileBuffer*)a; - const TileBuffer *const buf2 = (const TileBuffer*)b; - return (int)(buf2->size - buf1->size); -} - -static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, - const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const cm = &pbi->common; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - const uint8_t *bit_reader_end = NULL; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - const int num_workers = VPXMIN(pbi->max_threads, tile_cols); - int n; - - assert(tile_cols <= (1 << 6)); - assert(tile_rows == 1); - (void)tile_rows; - - if (pbi->num_tile_workers == 0) { - const int num_threads = pbi->max_threads; - CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); - for (n = 0; n < num_threads; ++n) { - VPxWorker *const worker = &pbi->tile_workers[n]; - ++pbi->num_tile_workers; - - winterface->init(worker); - if (n < num_threads - 1 && !winterface->reset(worker)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Tile decoder thread creation failed"); - } - } - } - - // Reset tile decoding hook - for (n = 0; n < num_workers; ++n) { - VPxWorker *const worker = &pbi->tile_workers[n]; - TileWorkerData *const tile_data = - &pbi->tile_worker_data[n + pbi->total_tiles]; - winterface->sync(worker); - tile_data->xd = pbi->mb; - tile_data->xd.counts = - cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; - worker->hook = (VPxWorkerHook)tile_worker_hook; - worker->data1 = tile_data; - worker->data2 = pbi; - } - - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); - memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_mi_cols); - - vp9_reset_lfm(cm); - - // Load tile data into tile_buffers - get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, - &pbi->tile_buffers); - - // Sort the buffers based on size in descending order. - qsort(pbi->tile_buffers, tile_cols, sizeof(pbi->tile_buffers[0]), - compare_tile_buffers); - - if (num_workers == tile_cols) { - // Rearrange the tile buffers such that the largest, and - // presumably the most difficult, tile will be decoded in the main thread. - // This should help minimize the number of instances where the main thread - // is waiting for a worker to complete. - const TileBuffer largest = pbi->tile_buffers[0]; - memmove(pbi->tile_buffers, pbi->tile_buffers + 1, - (tile_cols - 1) * sizeof(pbi->tile_buffers[0])); - pbi->tile_buffers[tile_cols - 1] = largest; - } else { - int start = 0, end = tile_cols - 2; - TileBuffer tmp; - - // Interleave the tiles to distribute the load between threads, assuming a - // larger tile implies it is more difficult to decode. - while (start < end) { - tmp = pbi->tile_buffers[start]; - pbi->tile_buffers[start] = pbi->tile_buffers[end]; - pbi->tile_buffers[end] = tmp; - start += 2; - end -= 2; - } - } - - // Initialize thread frame counts. - if (!cm->frame_parallel_decoding_mode) { - for (n = 0; n < num_workers; ++n) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[n].data1; - vp9_zero(tile_data->counts); - } - } - - { - const int base = tile_cols / num_workers; - const int remain = tile_cols % num_workers; - int buf_start = 0; - - for (n = 0; n < num_workers; ++n) { - const int count = base + (remain + n) / num_workers; - VPxWorker *const worker = &pbi->tile_workers[n]; - TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - - tile_data->buf_start = buf_start; - tile_data->buf_end = buf_start + count - 1; - tile_data->data_end = data_end; - buf_start += count; - - worker->had_error = 0; - if (n == num_workers - 1) { - assert(tile_data->buf_end == tile_cols - 1); - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } - - for (; n > 0; --n) { - VPxWorker *const worker = &pbi->tile_workers[n - 1]; - TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - // TODO(jzern): The tile may have specific error data associated with - // its vpx_internal_error_info which could be propagated to the main info - // in cm. Additionally once the threads have been synced and an error is - // detected, there's no point in continuing to decode tiles. - pbi->mb.corrupted |= !winterface->sync(worker); - if (!bit_reader_end) bit_reader_end = tile_data->data_end; - } - } - - // Accumulate thread frame counts. - if (!cm->frame_parallel_decoding_mode) { - for (n = 0; n < num_workers; ++n) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[n].data1; - vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); - } - } - - assert(bit_reader_end || pbi->mb.corrupted); - return bit_reader_end; -} - -static void error_handler(void *data) { - VP9_COMMON *const cm = (VP9_COMMON *)data; - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); -} - -static void read_bitdepth_colorspace_sampling( - VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - if (cm->profile >= PROFILE_2) { - cm->bit_depth = vpx_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = 1; -#endif - } else { - cm->bit_depth = VPX_BITS_8; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = 0; -#endif - } - cm->color_space = vpx_rb_read_literal(rb, 3); - if (cm->color_space != VPX_CS_SRGB) { - cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb); - if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { - cm->subsampling_x = vpx_rb_read_bit(rb); - cm->subsampling_y = vpx_rb_read_bit(rb); - if (cm->subsampling_x == 1 && cm->subsampling_y == 1) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "4:2:0 color not supported in profile 1 or 3"); - if (vpx_rb_read_bit(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Reserved bit set"); - } else { - cm->subsampling_y = cm->subsampling_x = 1; - } - } else { - cm->color_range = VPX_CR_FULL_RANGE; - if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { - // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed. - // 4:2:2 or 4:4:0 chroma sampling is not allowed. - cm->subsampling_y = cm->subsampling_x = 0; - if (vpx_rb_read_bit(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Reserved bit set"); - } else { - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "4:4:4 color not supported in profile 0 or 2"); - } - } -} - -static size_t read_uncompressed_header(VP9Decoder *pbi, - struct vpx_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; - BufferPool *const pool = cm->buffer_pool; - RefCntBuffer *const frame_bufs = pool->frame_bufs; - int i, mask, ref_index = 0; - size_t sz; - - cm->last_frame_type = cm->frame_type; - cm->last_intra_only = cm->intra_only; - - if (vpx_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame marker"); - - cm->profile = vp9_read_profile(rb); -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->profile >= MAX_PROFILES) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Unsupported bitstream profile"); -#else - if (cm->profile >= PROFILE_2) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Unsupported bitstream profile"); -#endif - - cm->show_existing_frame = vpx_rb_read_bit(rb); - if (cm->show_existing_frame) { - // Show an existing frame directly. - const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)]; - lock_buffer_pool(pool); - if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) { - unlock_buffer_pool(pool); - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Buffer %d does not contain a decoded frame", - frame_to_show); - } - - ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); - unlock_buffer_pool(pool); - pbi->refresh_frame_flags = 0; - cm->lf.filter_level = 0; - cm->show_frame = 1; - - if (pbi->frame_parallel_decode) { - for (i = 0; i < REF_FRAMES; ++i) - cm->next_ref_frame_map[i] = cm->ref_frame_map[i]; - } - return 0; - } - - cm->frame_type = (FRAME_TYPE) vpx_rb_read_bit(rb); - cm->show_frame = vpx_rb_read_bit(rb); - cm->error_resilient_mode = vpx_rb_read_bit(rb); - - if (cm->frame_type == KEY_FRAME) { - if (!vp9_read_sync_code(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - - read_bitdepth_colorspace_sampling(cm, rb); - pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1; - - for (i = 0; i < REFS_PER_FRAME; ++i) { - cm->frame_refs[i].idx = INVALID_IDX; - cm->frame_refs[i].buf = NULL; - } - - setup_frame_size(cm, rb); - if (pbi->need_resync) { - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - pbi->need_resync = 0; - } - } else { - cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb); - - cm->reset_frame_context = cm->error_resilient_mode ? - 0 : vpx_rb_read_literal(rb, 2); - - if (cm->intra_only) { - if (!vp9_read_sync_code(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - if (cm->profile > PROFILE_0) { - read_bitdepth_colorspace_sampling(cm, rb); - } else { - // NOTE: The intra-only frame header does not include the specification - // of either the color format or color sub-sampling in profile 0. VP9 - // specifies that the default color format should be YUV 4:2:0 in this - // case (normative). - cm->color_space = VPX_CS_BT_601; - cm->color_range = VPX_CR_STUDIO_RANGE; - cm->subsampling_y = cm->subsampling_x = 1; - cm->bit_depth = VPX_BITS_8; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = 0; -#endif - } - - pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); - setup_frame_size(cm, rb); - if (pbi->need_resync) { - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - pbi->need_resync = 0; - } - } else if (pbi->need_resync != 1) { /* Skip if need resync */ - pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); - for (i = 0; i < REFS_PER_FRAME; ++i) { - const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2); - const int idx = cm->ref_frame_map[ref]; - RefBuffer *const ref_frame = &cm->frame_refs[i]; - ref_frame->idx = idx; - ref_frame->buf = &frame_bufs[idx].buf; - cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb); - } - - setup_frame_size_with_refs(cm, rb); - - cm->allow_high_precision_mv = vpx_rb_read_bit(rb); - cm->interp_filter = read_interp_filter(rb); - - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_buf = &cm->frame_refs[i]; -#if CONFIG_VP9_HIGHBITDEPTH - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - ref_buf->buf->y_crop_width, - ref_buf->buf->y_crop_height, - cm->width, cm->height, - cm->use_highbitdepth); -#else - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - ref_buf->buf->y_crop_width, - ref_buf->buf->y_crop_height, - cm->width, cm->height); -#endif - } - } - } -#if CONFIG_VP9_HIGHBITDEPTH - get_frame_new_buffer(cm)->bit_depth = cm->bit_depth; -#endif - get_frame_new_buffer(cm)->color_space = cm->color_space; - get_frame_new_buffer(cm)->color_range = cm->color_range; - get_frame_new_buffer(cm)->render_width = cm->render_width; - get_frame_new_buffer(cm)->render_height = cm->render_height; - - if (pbi->need_resync) { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Keyframe / intra-only frame required to reset decoder" - " state"); - } - - if (!cm->error_resilient_mode) { - cm->refresh_frame_context = vpx_rb_read_bit(rb); - cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb); - if (!cm->frame_parallel_decoding_mode) - vp9_zero(cm->counts); - } else { - cm->refresh_frame_context = 0; - cm->frame_parallel_decoding_mode = 1; - } - - // This flag will be overridden by the call to vp9_setup_past_independence - // below, forcing the use of context 0 for those frame types. - cm->frame_context_idx = vpx_rb_read_literal(rb, FRAME_CONTEXTS_LOG2); - - // Generate next_ref_frame_map. - lock_buffer_pool(pool); - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - if (mask & 1) { - cm->next_ref_frame_map[ref_index] = cm->new_fb_idx; - ++frame_bufs[cm->new_fb_idx].ref_count; - } else { - cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; - } - // Current thread holds the reference frame. - if (cm->ref_frame_map[ref_index] >= 0) - ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; - ++ref_index; - } - - for (; ref_index < REF_FRAMES; ++ref_index) { - cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; - // Current thread holds the reference frame. - if (cm->ref_frame_map[ref_index] >= 0) - ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; - } - unlock_buffer_pool(pool); - pbi->hold_ref_buf = 1; - - if (frame_is_intra_only(cm) || cm->error_resilient_mode) - vp9_setup_past_independence(cm); - - setup_loopfilter(&cm->lf, rb); - setup_quantization(cm, &pbi->mb, rb); - setup_segmentation(&cm->seg, rb); - setup_segmentation_dequant(cm); - - setup_tile_info(cm, rb); - sz = vpx_rb_read_literal(rb, 16); - - if (sz == 0) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid header size"); - - return sz; -} - -static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, - size_t partition_size) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - FRAME_CONTEXT *const fc = cm->fc; - vpx_reader r; - int k; - - if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb, - pbi->decrypt_state)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); - - cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); - if (cm->tx_mode == TX_MODE_SELECT) - read_tx_mode_probs(&fc->tx_probs, &r); - read_coef_probs(fc, cm->tx_mode, &r); - - for (k = 0; k < SKIP_CONTEXTS; ++k) - vp9_diff_update_prob(&r, &fc->skip_probs[k]); - - if (!frame_is_intra_only(cm)) { - nmv_context *const nmvc = &fc->nmvc; - int i, j; - - read_inter_mode_probs(fc, &r); - - if (cm->interp_filter == SWITCHABLE) - read_switchable_interp_probs(fc, &r); - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); - - cm->reference_mode = read_frame_reference_mode(cm, &r); - if (cm->reference_mode != SINGLE_REFERENCE) - setup_compound_reference_mode(cm); - read_frame_reference_mode_probs(cm, &r); - - for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - for (i = 0; i < INTRA_MODES - 1; ++i) - vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); - - for (j = 0; j < PARTITION_CONTEXTS; ++j) - for (i = 0; i < PARTITION_TYPES - 1; ++i) - vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); - - read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); - } - - return vpx_reader_has_error(&r); -} - -static struct vpx_read_bit_buffer *init_read_bit_buffer( - VP9Decoder *pbi, - struct vpx_read_bit_buffer *rb, - const uint8_t *data, - const uint8_t *data_end, - uint8_t clear_data[MAX_VP9_HEADER_SIZE]) { - rb->bit_offset = 0; - rb->error_handler = error_handler; - rb->error_handler_data = &pbi->common; - if (pbi->decrypt_cb) { - const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data); - pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); - rb->bit_buffer = clear_data; - rb->bit_buffer_end = clear_data + n; - } else { - rb->bit_buffer = data; - rb->bit_buffer_end = data_end; - } - return rb; -} - -//------------------------------------------------------------------------------ - -int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb) { - return vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 && - vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 && - vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2; -} - -void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, - int *width, int *height) { - *width = vpx_rb_read_literal(rb, 16) + 1; - *height = vpx_rb_read_literal(rb, 16) + 1; -} - -BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb) { - int profile = vpx_rb_read_bit(rb); - profile |= vpx_rb_read_bit(rb) << 1; - if (profile > 2) - profile += vpx_rb_read_bit(rb); - return (BITSTREAM_PROFILE) profile; -} - -void vp9_decode_frame(VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - const uint8_t **p_data_end) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - struct vpx_read_bit_buffer rb; - int context_updated = 0; - uint8_t clear_data[MAX_VP9_HEADER_SIZE]; - const size_t first_partition_size = read_uncompressed_header(pbi, - init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); - const int tile_rows = 1 << cm->log2_tile_rows; - const int tile_cols = 1 << cm->log2_tile_cols; - YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); - xd->cur_buf = new_fb; - - if (!first_partition_size) { - // showing a frame directly - *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2); - return; - } - - data += vpx_rb_bytes_read(&rb); - if (!read_is_valid(data, first_partition_size, data_end)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt header length"); - - cm->use_prev_frame_mvs = !cm->error_resilient_mode && - cm->width == cm->last_width && - cm->height == cm->last_height && - !cm->last_intra_only && - cm->last_show_frame && - (cm->last_frame_type != KEY_FRAME); - - vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); - - *cm->fc = cm->frame_contexts[cm->frame_context_idx]; - if (!cm->fc->initialized) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Uninitialized entropy context."); - - xd->corrupted = 0; - new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); - if (new_fb->corrupted) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data header is corrupted."); - - if (cm->lf.filter_level && !cm->skip_loop_filter) { - vp9_loop_filter_frame_init(cm, cm->lf.filter_level); - } - - // If encoded in frame parallel mode, frame context is ready after decoding - // the frame header. - if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) { - VPxWorker *const worker = pbi->frame_worker_owner; - FrameWorkerData *const frame_worker_data = worker->data1; - if (cm->refresh_frame_context) { - context_updated = 1; - cm->frame_contexts[cm->frame_context_idx] = *cm->fc; - } - vp9_frameworker_lock_stats(worker); - pbi->cur_buf->row = -1; - pbi->cur_buf->col = -1; - frame_worker_data->frame_context_ready = 1; - // Signal the main thread that context is ready. - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); - } - - if (pbi->tile_worker_data == NULL || - (tile_cols * tile_rows) != pbi->total_tiles) { - const int num_tile_workers = tile_cols * tile_rows + - ((pbi->max_threads > 1) ? pbi->max_threads : 0); - const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data); - // Ensure tile data offsets will be properly aligned. This may fail on - // platforms without DECLARE_ALIGNED(). - assert((sizeof(*pbi->tile_worker_data) % 16) == 0); - vpx_free(pbi->tile_worker_data); - CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size)); - pbi->total_tiles = tile_rows * tile_cols; - } - - if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { - // Multi-threaded tile decoder - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - if (!xd->corrupted) { - if (!cm->skip_loop_filter) { - // If multiple threads are used to decode tiles, then we use those - // threads to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, - cm->lf.filter_level, 0, 0, pbi->tile_workers, - pbi->num_tile_workers, &pbi->lf_row_sync); - } - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); - } - } else { - *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); - } - - if (!xd->corrupted) { - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { - vp9_adapt_coef_probs(cm); - - if (!frame_is_intra_only(cm)) { - vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); - } - } - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); - } - - // Non frame parallel update frame context here. - if (cm->refresh_frame_context && !context_updated) - cm->frame_contexts[cm->frame_context_idx] = *cm->fc; -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h deleted file mode 100644 index ce33cbdbd9..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DECODEFRAME_H_ -#define VP9_DECODER_VP9_DECODEFRAME_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "vp9/common/vp9_enums.h" - -struct VP9Decoder; -struct vpx_read_bit_buffer; - -int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb); -void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, - int *width, int *height); -BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb); - -void vp9_decode_frame(struct VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - const uint8_t **p_data_end); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODEFRAME_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c deleted file mode 100644 index ffc6839ad1..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_seg_common.h" - -#include "vp9/decoder/vp9_decodemv.h" -#include "vp9/decoder/vp9_decodeframe.h" - -#include "vpx_dsp/vpx_dsp_common.h" - -static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) { - return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p); -} - -static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, int size_group) { - const PREDICTION_MODE y_mode = - read_intra_mode(r, cm->fc->y_mode_prob[size_group]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->y_mode[size_group][y_mode]; - return y_mode; -} - -static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, - PREDICTION_MODE y_mode) { - const PREDICTION_MODE uv_mode = read_intra_mode(r, - cm->fc->uv_mode_prob[y_mode]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->uv_mode[y_mode][uv_mode]; - return uv_mode; -} - -static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, int ctx) { - const int mode = vpx_read_tree(r, vp9_inter_mode_tree, - cm->fc->inter_mode_probs[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->inter_mode[ctx][mode]; - - return NEARESTMV + mode; -} - -static int read_segment_id(vpx_reader *r, const struct segmentation *seg) { - return vpx_read_tree(r, vp9_segment_tree, seg->tree_probs); -} - -static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - TX_SIZE max_tx_size, vpx_reader *r) { - FRAME_COUNTS *counts = xd->counts; - const int ctx = get_tx_size_context(xd); - const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs); - int tx_size = vpx_read(r, tx_probs[0]); - if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { - tx_size += vpx_read(r, tx_probs[1]); - if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) - tx_size += vpx_read(r, tx_probs[2]); - } - - if (counts) - ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size]; - return (TX_SIZE)tx_size; -} - -static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - int allow_select, vpx_reader *r) { - TX_MODE tx_mode = cm->tx_mode; - BLOCK_SIZE bsize = xd->mi[0]->sb_type; - const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; - if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) - return read_selected_tx_size(cm, xd, max_tx_size, r); - else - return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); -} - -static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, - int mi_offset, int x_mis, int y_mis) { - int x, y, segment_id = INT_MAX; - - for (y = 0; y < y_mis; y++) - for (x = 0; x < x_mis; x++) - segment_id = - VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); - - assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); - return segment_id; -} - -static void set_segment_id(VP9_COMMON *cm, int mi_offset, - int x_mis, int y_mis, int segment_id) { - int x, y; - - assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); - - for (y = 0; y < y_mis; y++) - for (x = 0; x < x_mis; x++) - cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; -} - -static void copy_segment_id(const VP9_COMMON *cm, - const uint8_t *last_segment_ids, - uint8_t *current_segment_ids, - int mi_offset, int x_mis, int y_mis) { - int x, y; - - for (y = 0; y < y_mis; y++) - for (x = 0; x < x_mis; x++) - current_segment_ids[mi_offset + y * cm->mi_cols + x] = last_segment_ids ? - last_segment_ids[mi_offset + y * cm->mi_cols + x] : 0; -} - -static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset, - int x_mis, int y_mis, - vpx_reader *r) { - struct segmentation *const seg = &cm->seg; - int segment_id; - - if (!seg->enabled) - return 0; // Default for disabled segmentation - - if (!seg->update_map) { - copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, - mi_offset, x_mis, y_mis); - return 0; - } - - segment_id = read_segment_id(r, seg); - set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); - return segment_id; -} - -static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - struct segmentation *const seg = &cm->seg; - MODE_INFO *const mi = xd->mi[0]; - int predicted_segment_id, segment_id; - const int mi_offset = mi_row * cm->mi_cols + mi_col; - - if (!seg->enabled) - return 0; // Default for disabled segmentation - - predicted_segment_id = cm->last_frame_seg_map ? - dec_get_segment_id(cm, cm->last_frame_seg_map, mi_offset, x_mis, y_mis) : - 0; - - if (!seg->update_map) { - copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, - mi_offset, x_mis, y_mis); - return predicted_segment_id; - } - - if (seg->temporal_update) { - const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); - mi->seg_id_predicted = vpx_read(r, pred_prob); - segment_id = mi->seg_id_predicted ? predicted_segment_id - : read_segment_id(r, seg); - } else { - segment_id = read_segment_id(r, seg); - } - set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); - return segment_id; -} - -static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, - int segment_id, vpx_reader *r) { - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { - return 1; - } else { - const int ctx = vp9_get_skip_context(xd); - const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->skip[ctx][skip]; - return skip; - } -} - -static void read_intra_frame_mode_info(VP9_COMMON *const cm, - MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - MODE_INFO *const mi = xd->mi[0]; - const MODE_INFO *above_mi = xd->above_mi; - const MODE_INFO *left_mi = xd->left_mi; - const BLOCK_SIZE bsize = mi->sb_type; - int i; - const int mi_offset = mi_row * cm->mi_cols + mi_col; - - mi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); - mi->skip = read_skip(cm, xd, mi->segment_id, r); - mi->tx_size = read_tx_size(cm, xd, 1, r); - mi->ref_frame[0] = INTRA_FRAME; - mi->ref_frame[1] = NONE; - - switch (bsize) { - case BLOCK_4X4: - for (i = 0; i < 4; ++i) - mi->bmi[i].as_mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); - mi->mode = mi->bmi[3].as_mode; - break; - case BLOCK_4X8: - mi->bmi[0].as_mode = mi->bmi[2].as_mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); - mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); - break; - case BLOCK_8X4: - mi->bmi[0].as_mode = mi->bmi[1].as_mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); - mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); - break; - default: - mi->mode = read_intra_mode(r, - get_y_mode_probs(mi, above_mi, left_mi, 0)); - } - - mi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mi->mode]); -} - -static int read_mv_component(vpx_reader *r, - const nmv_component *mvcomp, int usehp) { - int mag, d, fr, hp; - const int sign = vpx_read(r, mvcomp->sign); - const int mv_class = vpx_read_tree(r, vp9_mv_class_tree, mvcomp->classes); - const int class0 = mv_class == MV_CLASS_0; - - // Integer part - if (class0) { - d = vpx_read_tree(r, vp9_mv_class0_tree, mvcomp->class0); - mag = 0; - } else { - int i; - const int n = mv_class + CLASS0_BITS - 1; // number of bits - - d = 0; - for (i = 0; i < n; ++i) - d |= vpx_read(r, mvcomp->bits[i]) << i; - mag = CLASS0_SIZE << (mv_class + 2); - } - - // Fractional part - fr = vpx_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d] - : mvcomp->fp); - - // High precision part (if hp is not used, the default value of the hp is 1) - hp = usehp ? vpx_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) - : 1; - - // Result - mag += ((d << 3) | (fr << 1) | hp) + 1; - return sign ? -mag : mag; -} - -static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref, - const nmv_context *ctx, - nmv_context_counts *counts, int allow_hp) { - const MV_JOINT_TYPE joint_type = - (MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints); - const int use_hp = allow_hp && use_mv_hp(ref); - MV diff = {0, 0}; - - if (mv_joint_vertical(joint_type)) - diff.row = read_mv_component(r, &ctx->comps[0], use_hp); - - if (mv_joint_horizontal(joint_type)) - diff.col = read_mv_component(r, &ctx->comps[1], use_hp); - - vp9_inc_mv(&diff, counts); - - mv->row = ref->row + diff.row; - mv->col = ref->col + diff.col; -} - -static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, - const MACROBLOCKD *xd, - vpx_reader *r) { - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - const int ctx = vp9_get_reference_mode_context(cm, xd); - const REFERENCE_MODE mode = - (REFERENCE_MODE)vpx_read(r, cm->fc->comp_inter_prob[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->comp_inter[ctx][mode]; - return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE - } else { - return cm->reference_mode; - } -} - -// Read the referncence frame -static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r, - int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { - FRAME_CONTEXT *const fc = cm->fc; - FRAME_COUNTS *counts = xd->counts; - - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id, - SEG_LVL_REF_FRAME); - ref_frame[1] = NONE; - } else { - const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); - // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding - if (mode == COMPOUND_REFERENCE) { - const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; - const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); - const int bit = vpx_read(r, fc->comp_ref_prob[ctx]); - if (counts) - ++counts->comp_ref[ctx][bit]; - ref_frame[idx] = cm->comp_fixed_ref; - ref_frame[!idx] = cm->comp_var_ref[bit]; - } else if (mode == SINGLE_REFERENCE) { - const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); - const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); - if (counts) - ++counts->single_ref[ctx0][0][bit0]; - if (bit0) { - const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); - const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); - if (counts) - ++counts->single_ref[ctx1][1][bit1]; - ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; - } else { - ref_frame[0] = LAST_FRAME; - } - - ref_frame[1] = NONE; - } else { - assert(0 && "Invalid prediction mode."); - } - } -} - -// TODO(slavarnway): Move this decoder version of -// vp9_get_pred_context_switchable_interp() to vp9_pred_common.h and update the -// encoder. -// -// Returns a context number for the given MB prediction signal -static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - const MODE_INFO *const left_mi = xd->left_mi; - const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS; - const MODE_INFO *const above_mi = xd->above_mi; - const int above_type = above_mi ? above_mi->interp_filter - : SWITCHABLE_FILTERS; - - if (left_type == above_type) - return left_type; - else if (left_type == SWITCHABLE_FILTERS) - return above_type; - else if (above_type == SWITCHABLE_FILTERS) - return left_type; - else - return SWITCHABLE_FILTERS; -} - -static INLINE INTERP_FILTER read_switchable_interp_filter( - VP9_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r) { - const int ctx = dec_get_pred_context_switchable_interp(xd); - const INTERP_FILTER type = - (INTERP_FILTER)vpx_read_tree(r, vp9_switchable_interp_tree, - cm->fc->switchable_interp_prob[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->switchable_interp[ctx][type]; - return type; -} - -static void read_intra_block_mode_info(VP9_COMMON *const cm, - MACROBLOCKD *const xd, MODE_INFO *mi, - vpx_reader *r) { - const BLOCK_SIZE bsize = mi->sb_type; - int i; - - switch (bsize) { - case BLOCK_4X4: - for (i = 0; i < 4; ++i) - mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0); - mi->mode = mi->bmi[3].as_mode; - break; - case BLOCK_4X8: - mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, - r, 0); - mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode_y(cm, xd, r, 0); - break; - case BLOCK_8X4: - mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, - r, 0); - mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode_y(cm, xd, r, 0); - break; - default: - mi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); - } - - mi->uv_mode = read_intra_mode_uv(cm, xd, r, mi->mode); - - // Initialize interp_filter here so we do not have to check for inter block - // modes in dec_get_pred_context_switchable_interp() - mi->interp_filter = SWITCHABLE_FILTERS; - - mi->ref_frame[0] = INTRA_FRAME; - mi->ref_frame[1] = NONE; -} - -static INLINE int is_mv_valid(const MV *mv) { - return mv->row > MV_LOW && mv->row < MV_UPP && - mv->col > MV_LOW && mv->col < MV_UPP; -} - -static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) { - memcpy(dst, src, sizeof(*dst) * 2); -} - -static INLINE void zero_mv_pair(int_mv *dst) { - memset(dst, 0, sizeof(*dst) * 2); -} - -static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, - PREDICTION_MODE mode, - int_mv mv[2], int_mv ref_mv[2], - int_mv near_nearest_mv[2], - int is_compound, int allow_hp, vpx_reader *r) { - int i; - int ret = 1; - - switch (mode) { - case NEWMV: { - FRAME_COUNTS *counts = xd->counts; - nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL; - for (i = 0; i < 1 + is_compound; ++i) { - read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts, - allow_hp); - ret = ret && is_mv_valid(&mv[i].as_mv); - } - break; - } - case NEARMV: - case NEARESTMV: { - copy_mv_pair(mv, near_nearest_mv); - break; - } - case ZEROMV: { - zero_mv_pair(mv); - break; - } - default: { - return 0; - } - } - return ret; -} - -static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, - int segment_id, vpx_reader *r) { - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; - } else { - const int ctx = get_intra_inter_context(xd); - const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->intra_inter[ctx][is_inter]; - return is_inter; - } -} - -static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, - int refmv_count) { - int i; - - // Make sure all the candidates are properly clamped etc - for (i = 0; i < refmv_count; ++i) { - lower_mv_precision(&mvlist[i].as_mv, allow_hp); - *best_mv = mvlist[i]; - } -} - -static void fpm_sync(void *const data, int mi_row) { - VP9Decoder *const pbi = (VP9Decoder *)data; - vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame, - mi_row << MI_BLOCK_SIZE_LOG2); -} - -// This macro is used to add a motion vector mv_ref list if it isn't -// already in the list. If it's the second motion vector or early_break -// it will also skip all additional processing and jump to Done! -#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \ - do { \ - if (refmv_count) { \ - if ((mv).as_int != (mv_ref_list)[0].as_int) { \ - (mv_ref_list)[(refmv_count)] = (mv); \ - refmv_count++; \ - goto Done; \ - } \ - } else { \ - (mv_ref_list)[(refmv_count)++] = (mv); \ - if (early_break) \ - goto Done; \ - } \ - } while (0) - -// If either reference frame is different, not INTRA, and they -// are different from each other scale and add the mv to our list. -#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \ - refmv_count, mv_ref_list, Done) \ - do { \ - if (is_inter_block(mbmi)) { \ - if ((mbmi)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - if (has_second_ref(mbmi) && \ - (mbmi)->ref_frame[1] != ref_frame && \ - (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ - ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - } \ - } while (0) - -// This function searches the neighborhood of a given MB/SB -// to try and find candidate reference vectors. -static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, - const POSITION *const mv_ref_search, - int_mv *mv_ref_list, - int mi_row, int mi_col, int block, int is_sub8x8, - find_mv_refs_sync sync, void *const data) { - const int *ref_sign_bias = cm->ref_frame_sign_bias; - int i, refmv_count = 0; - int different_ref_found = 0; - const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? - cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; - const TileInfo *const tile = &xd->tile; - // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop - // searching after the first mv is found. - const int early_break = (mode != NEARMV); - - // Blank the reference vector list - memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); - - i = 0; - if (is_sub8x8) { - // If the size < 8x8 we get the mv from the bmi substructure for the - // nearest two blocks. - for (i = 0; i < 2; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - different_ref_found = 1; - - if (candidate_mi->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST_EB( - get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - else if (candidate_mi->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST_EB( - get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - } - } - } - - // Check the rest of the neighbors in much the same way - // as before except we don't need to keep track of sub blocks or - // mode counts. - for (; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - different_ref_found = 1; - - if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done); - else if (candidate->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast - // on windows platform. The sync here is unnecessary if use_prev_frame_mvs - // is 0. But after removing it, there will be hang in the unit test on windows - // due to several threads waiting for a thread's signal. -#if defined(_WIN32) && !HAVE_PTHREAD_H - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } -#endif - - // Check the last frame's mode and mv info. - if (prev_frame_mvs) { - // Synchronize here for frame parallel decode if sync function is provided. - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } - - if (prev_frame_mvs->ref_frame[0] == ref_frame) { - ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); - } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { - ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // Since we couldn't find 2 mvs from the same reference frame - // go back through the neighbors and find motion vectors from - // different reference frames. - if (different_ref_found) { - for (i = 0; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - - // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias, - refmv_count, mv_ref_list, Done); - } - } - } - - // Since we still don't have a candidate we'll try the last frame. - if (prev_frame_mvs) { - if (prev_frame_mvs->ref_frame[0] != ref_frame && - prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { - int_mv mv = prev_frame_mvs->mv[0]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); - } - - if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && - prev_frame_mvs->ref_frame[1] != ref_frame && - prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { - int_mv mv = prev_frame_mvs->mv[1]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); - } - } - - if (mode == NEARMV) - refmv_count = MAX_MV_REF_CANDIDATES; - else - // we only care about the nearestmv for the remaining modes - refmv_count = 1; - - Done: - // Clamp vectors - for (i = 0; i < refmv_count; ++i) - clamp_mv_ref(&mv_ref_list[i].as_mv, xd); - - return refmv_count; -} - -static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, - const POSITION *const mv_ref_search, - PREDICTION_MODE b_mode, int block, - int ref, int mi_row, int mi_col, - int_mv *best_sub8x8) { - int_mv mv_list[MAX_MV_REF_CANDIDATES]; - MODE_INFO *const mi = xd->mi[0]; - b_mode_info *bmi = mi->bmi; - int n; - int refmv_count; - - assert(MAX_MV_REF_CANDIDATES == 2); - - refmv_count = dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], - mv_ref_search, mv_list, mi_row, mi_col, block, - 1, NULL, NULL); - - switch (block) { - case 0: - best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; - break; - case 1: - case 2: - if (b_mode == NEARESTMV) { - best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; - } else { - best_sub8x8->as_int = 0; - for (n = 0; n < refmv_count; ++n) - if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { - best_sub8x8->as_int = mv_list[n].as_int; - break; - } - } - break; - case 3: - if (b_mode == NEARESTMV) { - best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; - } else { - int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; - candidates[0] = bmi[1].as_mv[ref]; - candidates[1] = bmi[0].as_mv[ref]; - candidates[2] = mv_list[0]; - candidates[3] = mv_list[1]; - best_sub8x8->as_int = 0; - for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) - if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) { - best_sub8x8->as_int = candidates[n].as_int; - break; - } - } - break; - default: - assert(0 && "Invalid block index."); - } -} - -static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd, - const POSITION *const mv_ref_search, - int mi_row, int mi_col) { - int i; - int context_counter = 0; - const TileInfo *const tile = &xd->tile; - - // Get mode count from nearest 2 blocks - for (i = 0; i < 2; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate->mode]; - } - } - - return counter_to_context[context_counter]; -} - -static void read_inter_block_mode_info(VP9Decoder *const pbi, - MACROBLOCKD *const xd, - MODE_INFO *const mi, - int mi_row, int mi_col, vpx_reader *r) { - VP9_COMMON *const cm = &pbi->common; - const BLOCK_SIZE bsize = mi->sb_type; - const int allow_hp = cm->allow_high_precision_mv; - int_mv best_ref_mvs[2]; - int ref, is_compound; - uint8_t inter_mode_ctx; - const POSITION *const mv_ref_search = mv_ref_blocks[bsize]; - - read_ref_frames(cm, xd, r, mi->segment_id, mi->ref_frame); - is_compound = has_second_ref(mi); - inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col); - - if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { - mi->mode = ZEROMV; - if (bsize < BLOCK_8X8) { - vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid usage of segement feature on small blocks"); - return; - } - } else { - if (bsize >= BLOCK_8X8) - mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); - else - // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV. - // Setting mode to NEARESTMV forces the search to stop after the nearestmv - // has been found. After b_modes have been read, mode will be overwritten - // by the last b_mode. - mi->mode = NEARESTMV; - - if (mi->mode != ZEROMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) { - int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; - const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; - int refmv_count; - - refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, - tmp_mvs, mi_row, mi_col, -1, 0, - fpm_sync, (void *)pbi); - - dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], - refmv_count); - } - } - } - - mi->interp_filter = (cm->interp_filter == SWITCHABLE) - ? read_switchable_interp_filter(cm, xd, r) - : cm->interp_filter; - - if (bsize < BLOCK_8X8) { - const int num_4x4_w = 1 << xd->bmode_blocks_wl; - const int num_4x4_h = 1 << xd->bmode_blocks_hl; - int idx, idy; - PREDICTION_MODE b_mode; - int_mv best_sub8x8[2]; - for (idy = 0; idy < 2; idy += num_4x4_h) { - for (idx = 0; idx < 2; idx += num_4x4_w) { - const int j = idy * 2 + idx; - b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx); - - if (b_mode == NEARESTMV || b_mode == NEARMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) - append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, - mi_row, mi_col, &best_sub8x8[ref]); - } - - if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, - best_sub8x8, is_compound, allow_hp, r)) { - xd->corrupted |= 1; - break; - } - - if (num_4x4_h == 2) - mi->bmi[j + 2] = mi->bmi[j]; - if (num_4x4_w == 2) - mi->bmi[j + 1] = mi->bmi[j]; - } - } - - mi->mode = b_mode; - - copy_mv_pair(mi->mv, mi->bmi[3].as_mv); - } else { - xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, - best_ref_mvs, is_compound, allow_hp, r); - } -} - -static void read_inter_frame_mode_info(VP9Decoder *const pbi, - MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - VP9_COMMON *const cm = &pbi->common; - MODE_INFO *const mi = xd->mi[0]; - int inter_block; - - mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r, x_mis, - y_mis); - mi->skip = read_skip(cm, xd, mi->segment_id, r); - inter_block = read_is_inter_block(cm, xd, mi->segment_id, r); - mi->tx_size = read_tx_size(cm, xd, !mi->skip || !inter_block, r); - - if (inter_block) - read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); - else - read_intra_block_mode_info(cm, xd, mi, r); -} - -static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst, - const MV_REFERENCE_FRAME *src) { - memcpy(dst, src, sizeof(*dst) * 2); -} - -void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - VP9_COMMON *const cm = &pbi->common; - MODE_INFO *const mi = xd->mi[0]; - MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; - int w, h; - - if (frame_is_intra_only(cm)) { - read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r, x_mis, y_mis); - } else { - read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - - for (h = 0; h < y_mis; ++h) { - for (w = 0; w < x_mis; ++w) { - MV_REF *const mv = frame_mvs + w; - copy_ref_frame_pair(mv->ref_frame, mi->ref_frame); - copy_mv_pair(mv->mv, mi->mv); - } - frame_mvs += cm->mi_cols; - } - } -#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH - if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && - (xd->above_mi == NULL || xd->left_mi == NULL) && - !is_inter_block(mi) && need_top_left[mi->uv_mode]) - assert(0); -#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h deleted file mode 100644 index 45569ec81f..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DECODEMV_H_ -#define VP9_DECODER_VP9_DECODEMV_H_ - -#include "vpx_dsp/bitreader.h" - -#include "vp9/decoder/vp9_decoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.c b/thirdparty/libvpx/vp9/decoder/vp9_decoder.c deleted file mode 100644 index 935c04f3aa..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decoder.c +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include - -#include "./vp9_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" - -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/system_state.h" -#include "vpx_ports/vpx_once.h" -#include "vpx_ports/vpx_timer.h" -#include "vpx_scale/vpx_scale.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" -#if CONFIG_VP9_POSTPROC -#include "vp9/common/vp9_postproc.h" -#endif -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconintra.h" - -#include "vp9/decoder/vp9_decodeframe.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/decoder/vp9_detokenize.h" - -static void initialize_dec(void) { - static volatile int init_done = 0; - - if (!init_done) { - vp9_rtcd(); - vpx_dsp_rtcd(); - vpx_scale_rtcd(); - vp9_init_intra_predictors(); - init_done = 1; - } -} - -static void vp9_dec_setup_mi(VP9_COMMON *cm) { - cm->mi = cm->mip + cm->mi_stride + 1; - cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; - memset(cm->mi_grid_base, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); -} - -static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { - cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); - if (!cm->mip) - return 1; - cm->mi_alloc_size = mi_size; - cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*)); - if (!cm->mi_grid_base) - return 1; - return 0; -} - -static void vp9_dec_free_mi(VP9_COMMON *cm) { - vpx_free(cm->mip); - cm->mip = NULL; - vpx_free(cm->mi_grid_base); - cm->mi_grid_base = NULL; -} - -VP9Decoder *vp9_decoder_create(BufferPool *const pool) { - VP9Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi)); - VP9_COMMON *volatile const cm = pbi ? &pbi->common : NULL; - - if (!cm) - return NULL; - - vp9_zero(*pbi); - - if (setjmp(cm->error.jmp)) { - cm->error.setjmp = 0; - vp9_decoder_remove(pbi); - return NULL; - } - - cm->error.setjmp = 1; - - CHECK_MEM_ERROR(cm, cm->fc, - (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); - CHECK_MEM_ERROR(cm, cm->frame_contexts, - (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, - sizeof(*cm->frame_contexts))); - - pbi->need_resync = 1; - once(initialize_dec); - - // Initialize the references to not point to any frame buffers. - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); - - cm->current_video_frame = 0; - pbi->ready_for_new_data = 1; - pbi->common.buffer_pool = pool; - - cm->bit_depth = VPX_BITS_8; - cm->dequant_bit_depth = VPX_BITS_8; - - cm->alloc_mi = vp9_dec_alloc_mi; - cm->free_mi = vp9_dec_free_mi; - cm->setup_mi = vp9_dec_setup_mi; - - vp9_loop_filter_init(cm); - - cm->error.setjmp = 0; - - vpx_get_worker_interface()->init(&pbi->lf_worker); - - return pbi; -} - -void vp9_decoder_remove(VP9Decoder *pbi) { - int i; - - if (!pbi) - return; - - vpx_get_worker_interface()->end(&pbi->lf_worker); - vpx_free(pbi->lf_worker.data1); - - for (i = 0; i < pbi->num_tile_workers; ++i) { - VPxWorker *const worker = &pbi->tile_workers[i]; - vpx_get_worker_interface()->end(worker); - } - - vpx_free(pbi->tile_worker_data); - vpx_free(pbi->tile_workers); - - if (pbi->num_tile_workers > 0) { - vp9_loop_filter_dealloc(&pbi->lf_row_sync); - } - - vpx_free(pbi); -} - -static int equal_dimensions(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b) { - return a->y_height == b->y_height && a->y_width == b->y_width && - a->uv_height == b->uv_height && a->uv_width == b->uv_width; -} - -vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - VP9_COMMON *cm = &pbi->common; - - /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - * encoder is using the frame buffers for. This is just a stub to keep the - * vpxenc --test-decode functionality working, and will be replaced in a - * later commit that adds VP9-specific controls for this functionality. - */ - if (ref_frame_flag == VP9_LAST_FLAG) { - const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0); - if (cfg == NULL) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "No 'last' reference frame"); - return VPX_CODEC_ERROR; - } - if (!equal_dimensions(cfg, sd)) - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - else - vp8_yv12_copy_frame(cfg, sd); - } else { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Invalid reference frame"); - } - - return cm->error.error_code; -} - - -vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - RefBuffer *ref_buf = NULL; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - - // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - // encoder is using the frame buffers for. This is just a stub to keep the - // vpxenc --test-decode functionality working, and will be replaced in a - // later commit that adds VP9-specific controls for this functionality. - if (ref_frame_flag == VP9_LAST_FLAG) { - ref_buf = &cm->frame_refs[0]; - } else if (ref_frame_flag == VP9_GOLD_FLAG) { - ref_buf = &cm->frame_refs[1]; - } else if (ref_frame_flag == VP9_ALT_FLAG) { - ref_buf = &cm->frame_refs[2]; - } else { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Invalid reference frame"); - return cm->error.error_code; - } - - if (!equal_dimensions(ref_buf->buf, sd)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } else { - int *ref_fb_ptr = &ref_buf->idx; - - // Find an empty frame buffer. - const int free_fb = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Unable to find free frame buffer"); - return cm->error.error_code; - } - - // Decrease ref_count since it will be increased again in - // ref_cnt_fb() below. - --frame_bufs[free_fb].ref_count; - - // Manage the reference counters and copy image. - ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb); - ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf; - vp8_yv12_copy_frame(sd, ref_buf->buf); - } - - return cm->error.error_code; -} - -/* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9Decoder *pbi) { - int ref_index = 0, mask; - VP9_COMMON *const cm = &pbi->common; - BufferPool *const pool = cm->buffer_pool; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - - lock_buffer_pool(pool); - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - // Current thread releases the holding of reference frame. - decrease_ref_count(old_idx, frame_bufs, pool); - - // Release the reference frame in reference map. - if (mask & 1) { - decrease_ref_count(old_idx, frame_bufs, pool); - } - cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; - ++ref_index; - } - - // Current thread releases the holding of reference frame. - for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { - const int old_idx = cm->ref_frame_map[ref_index]; - decrease_ref_count(old_idx, frame_bufs, pool); - cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; - } - unlock_buffer_pool(pool); - pbi->hold_ref_buf = 0; - cm->frame_to_show = get_frame_new_buffer(cm); - - if (!pbi->frame_parallel_decode || !cm->show_frame) { - lock_buffer_pool(pool); - --frame_bufs[cm->new_fb_idx].ref_count; - unlock_buffer_pool(pool); - } - - // Invalidate these references until the next frame starts. - for (ref_index = 0; ref_index < 3; ref_index++) - cm->frame_refs[ref_index].idx = -1; -} - -int vp9_receive_compressed_data(VP9Decoder *pbi, - size_t size, const uint8_t **psource) { - VP9_COMMON *volatile const cm = &pbi->common; - BufferPool *volatile const pool = cm->buffer_pool; - RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; - const uint8_t *source = *psource; - int retcode = 0; - cm->error.error_code = VPX_CODEC_OK; - - if (size == 0) { - // This is used to signal that we are missing frames. - // We do not know if the missing frame(s) was supposed to update - // any of the reference buffers, but we act conservative and - // mark only the last buffer as corrupted. - // - // TODO(jkoleszar): Error concealment is undefined and non-normative - // at this point, but if it becomes so, [0] may not always be the correct - // thing to do here. - if (cm->frame_refs[0].idx > 0) { - assert(cm->frame_refs[0].buf != NULL); - cm->frame_refs[0].buf->corrupted = 1; - } - } - - pbi->ready_for_new_data = 0; - - // Check if the previous frame was a frame without any references to it. - // Release frame buffer if not decoding in frame parallel mode. - if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 - && frame_bufs[cm->new_fb_idx].ref_count == 0) - pool->release_fb_cb(pool->cb_priv, - &frame_bufs[cm->new_fb_idx].raw_frame_buffer); - // Find a free frame buffer. Return error if can not find any. - cm->new_fb_idx = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Unable to find free frame buffer"); - return cm->error.error_code; - } - - // Assign a MV array to the frame buffer. - cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; - - pbi->hold_ref_buf = 0; - if (pbi->frame_parallel_decode) { - VPxWorker *const worker = pbi->frame_worker_owner; - vp9_frameworker_lock_stats(worker); - frame_bufs[cm->new_fb_idx].frame_worker_owner = worker; - // Reset decoding progress. - pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; - pbi->cur_buf->row = -1; - pbi->cur_buf->col = -1; - vp9_frameworker_unlock_stats(worker); - } else { - pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; - } - - - if (setjmp(cm->error.jmp)) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - int i; - - cm->error.setjmp = 0; - pbi->ready_for_new_data = 1; - - // Synchronize all threads immediately as a subsequent decode call may - // cause a resize invalidating some allocations. - winterface->sync(&pbi->lf_worker); - for (i = 0; i < pbi->num_tile_workers; ++i) { - winterface->sync(&pbi->tile_workers[i]); - } - - lock_buffer_pool(pool); - // Release all the reference buffers if worker thread is holding them. - if (pbi->hold_ref_buf == 1) { - int ref_index = 0, mask; - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - // Current thread releases the holding of reference frame. - decrease_ref_count(old_idx, frame_bufs, pool); - - // Release the reference frame in reference map. - if (mask & 1) { - decrease_ref_count(old_idx, frame_bufs, pool); - } - ++ref_index; - } - - // Current thread releases the holding of reference frame. - for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { - const int old_idx = cm->ref_frame_map[ref_index]; - decrease_ref_count(old_idx, frame_bufs, pool); - } - pbi->hold_ref_buf = 0; - } - // Release current frame. - decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); - unlock_buffer_pool(pool); - - vpx_clear_system_state(); - return -1; - } - - cm->error.setjmp = 1; - vp9_decode_frame(pbi, source, source + size, psource); - - swap_frame_buffers(pbi); - - vpx_clear_system_state(); - - if (!cm->show_existing_frame) { - cm->last_show_frame = cm->show_frame; - cm->prev_frame = cm->cur_frame; - if (cm->seg.enabled && !pbi->frame_parallel_decode) - vp9_swap_current_and_last_seg_map(cm); - } - - // Update progress in frame parallel decode. - if (pbi->frame_parallel_decode) { - // Need to lock the mutex here as another thread may - // be accessing this buffer. - VPxWorker *const worker = pbi->frame_worker_owner; - FrameWorkerData *const frame_worker_data = worker->data1; - vp9_frameworker_lock_stats(worker); - - if (cm->show_frame) { - cm->current_video_frame++; - } - frame_worker_data->frame_decoded = 1; - frame_worker_data->frame_context_ready = 1; - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); - } else { - cm->last_width = cm->width; - cm->last_height = cm->height; - if (cm->show_frame) { - cm->current_video_frame++; - } - } - - cm->error.setjmp = 0; - return retcode; -} - -int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp9_ppflags_t *flags) { - VP9_COMMON *const cm = &pbi->common; - int ret = -1; -#if !CONFIG_VP9_POSTPROC - (void)*flags; -#endif - - if (pbi->ready_for_new_data == 1) - return ret; - - pbi->ready_for_new_data = 1; - - /* no raw frame to show!!! */ - if (!cm->show_frame) - return ret; - - pbi->ready_for_new_data = 1; - -#if CONFIG_VP9_POSTPROC - if (!cm->show_existing_frame) { - ret = vp9_post_proc_frame(cm, sd, flags); - } else { - *sd = *cm->frame_to_show; - ret = 0; - } -#else - *sd = *cm->frame_to_show; - ret = 0; -#endif /*!CONFIG_POSTPROC*/ - vpx_clear_system_state(); - return ret; -} - -vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - // A chunk ending with a byte matching 0xc0 is an invalid chunk unless - // it is a super frame index. If the last byte of real video compression - // data is 0xc0 the encoder must add a 0 byte. If we have the marker but - // not the associated matching marker byte at the front of the index we have - // an invalid bitstream and need to return an error. - - uint8_t marker; - - assert(data_sz); - marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); - *count = 0; - - if ((marker & 0xe0) == 0xc0) { - const uint32_t frames = (marker & 0x7) + 1; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frames; - - // This chunk is marked as having a superframe index but doesn't have - // enough data for it, thus it's an invalid superframe index. - if (data_sz < index_sz) - return VPX_CODEC_CORRUPT_FRAME; - - { - const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, - data + data_sz - index_sz); - - // This chunk is marked as having a superframe index but doesn't have - // the matching marker byte at the front of the index therefore it's an - // invalid chunk. - if (marker != marker2) - return VPX_CODEC_CORRUPT_FRAME; - } - - { - // Found a valid superframe index. - uint32_t i, j; - const uint8_t *x = &data[data_sz - index_sz + 1]; - - // Frames has a maximum of 8 and mag has a maximum of 4. - uint8_t clear_buffer[32]; - assert(sizeof(clear_buffer) >= frames * mag); - if (decrypt_cb) { - decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); - x = clear_buffer; - } - - for (i = 0; i < frames; ++i) { - uint32_t this_sz = 0; - - for (j = 0; j < mag; ++j) - this_sz |= ((uint32_t)(*x++)) << (j * 8); - sizes[i] = this_sz; - } - *count = frames; - } - } - return VPX_CODEC_OK; -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.h b/thirdparty/libvpx/vp9/decoder/vp9_decoder.h deleted file mode 100644 index 7111a36d37..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decoder.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DECODER_H_ -#define VP9_DECODER_VP9_DECODER_H_ - -#include "./vpx_config.h" - -#include "vpx/vpx_codec.h" -#include "vpx_dsp/bitreader.h" -#include "vpx_scale/yv12config.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_thread_common.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_ppflags.h" -#include "vp9/decoder/vp9_dthread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct TileBuffer { - const uint8_t *data; - size_t size; - int col; // only used with multi-threaded decoding -} TileBuffer; - -typedef struct TileWorkerData { - const uint8_t *data_end; - int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive - vpx_reader bit_reader; - FRAME_COUNTS counts; - DECLARE_ALIGNED(16, MACROBLOCKD, xd); - /* dqcoeff are shared by all the planes. So planes must be decoded serially */ - DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); - struct vpx_internal_error_info error_info; -} TileWorkerData; - -typedef struct VP9Decoder { - DECLARE_ALIGNED(16, MACROBLOCKD, mb); - - DECLARE_ALIGNED(16, VP9_COMMON, common); - - int ready_for_new_data; - - int refresh_frame_flags; - - int frame_parallel_decode; // frame-based threading. - - // TODO(hkuang): Combine this with cur_buf in macroblockd as they are - // the same. - RefCntBuffer *cur_buf; // Current decoding frame buffer. - - VPxWorker *frame_worker_owner; // frame_worker that owns this pbi. - VPxWorker lf_worker; - VPxWorker *tile_workers; - TileWorkerData *tile_worker_data; - TileBuffer tile_buffers[64]; - int num_tile_workers; - int total_tiles; - - VP9LfSync lf_row_sync; - - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - - int max_threads; - int inv_tile_order; - int need_resync; // wait for key/intra-only frame. - int hold_ref_buf; // hold the reference buffer. -} VP9Decoder; - -int vp9_receive_compressed_data(struct VP9Decoder *pbi, - size_t size, const uint8_t **dest); - -int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp9_ppflags_t *flags); - -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, - void *decrypt_state, - const uint8_t *data) { - if (decrypt_cb) { - uint8_t marker; - decrypt_cb(decrypt_state, data, &marker, 1); - return marker; - } - return *data; -} - -// This function is exposed for use in tests, as well as the inlined function -// "read_marker". -vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); - -void vp9_decoder_remove(struct VP9Decoder *pbi); - -static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, - BufferPool *const pool) { - if (idx >= 0 && frame_bufs[idx].ref_count > 0) { - --frame_bufs[idx].ref_count; - // A worker may only get a free framebuffer index when calling get_free_fb. - // But the private buffer is not set up until finish decoding header. - // So any error happens during decoding header, the frame_bufs will not - // have valid priv buffer. - if (frame_bufs[idx].ref_count == 0 && - frame_bufs[idx].raw_frame_buffer.priv) { - pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); - } - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODER_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c deleted file mode 100644 index 47dc107fe2..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#if CONFIG_COEFFICIENT_RANGE_CHECKING -#include "vp9/common/vp9_idct.h" -#endif - -#include "vp9/decoder/vp9_detokenize.h" - -#define EOB_CONTEXT_NODE 0 -#define ZERO_CONTEXT_NODE 1 -#define ONE_CONTEXT_NODE 2 - -#define INCREMENT_COUNT(token) \ - do { \ - if (counts) \ - ++coef_counts[band][ctx][token]; \ - } while (0) - -static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) { - int i, val = 0; - for (i = 0; i < n; ++i) - val = (val << 1) | vpx_read(r, probs[i]); - return val; -} - -static int decode_coefs(const MACROBLOCKD *xd, - PLANE_TYPE type, - tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, - int ctx, const int16_t *scan, const int16_t *nb, - vpx_reader *r) { - FRAME_COUNTS *counts = xd->counts; - const int max_eob = 16 << (tx_size << 1); - const FRAME_CONTEXT *const fc = xd->fc; - const int ref = is_inter_block(xd->mi[0]); - int band, c = 0; - const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = - fc->coef_probs[tx_size][type][ref]; - const vpx_prob *prob; - unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; - unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; - uint8_t token_cache[32 * 32]; - const uint8_t *band_translate = get_band_translate(tx_size); - const int dq_shift = (tx_size == TX_32X32); - int v, token; - int16_t dqv = dq[0]; - const uint8_t *const cat6_prob = -#if CONFIG_VP9_HIGHBITDEPTH - (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 : - (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 : -#endif // CONFIG_VP9_HIGHBITDEPTH - vp9_cat6_prob; - const int cat6_bits = -#if CONFIG_VP9_HIGHBITDEPTH - (xd->bd == VPX_BITS_12) ? 18 : - (xd->bd == VPX_BITS_10) ? 16 : -#endif // CONFIG_VP9_HIGHBITDEPTH - 14; - - if (counts) { - coef_counts = counts->coef[tx_size][type][ref]; - eob_branch_count = counts->eob_branch[tx_size][type][ref]; - } - - while (c < max_eob) { - int val = -1; - band = *band_translate++; - prob = coef_probs[band][ctx]; - if (counts) - ++eob_branch_count[band][ctx]; - if (!vpx_read(r, prob[EOB_CONTEXT_NODE])) { - INCREMENT_COUNT(EOB_MODEL_TOKEN); - break; - } - - while (!vpx_read(r, prob[ZERO_CONTEXT_NODE])) { - INCREMENT_COUNT(ZERO_TOKEN); - dqv = dq[1]; - token_cache[scan[c]] = 0; - ++c; - if (c >= max_eob) - return c; // zero tokens at the end (no eob token) - ctx = get_coef_context(nb, token_cache, c); - band = *band_translate++; - prob = coef_probs[band][ctx]; - } - - if (!vpx_read(r, prob[ONE_CONTEXT_NODE])) { - INCREMENT_COUNT(ONE_TOKEN); - token = ONE_TOKEN; - val = 1; - } else { - INCREMENT_COUNT(TWO_TOKEN); - token = vpx_read_tree(r, vp9_coef_con_tree, - vp9_pareto8_full[prob[PIVOT_NODE] - 1]); - switch (token) { - case TWO_TOKEN: - case THREE_TOKEN: - case FOUR_TOKEN: - val = token; - break; - case CATEGORY1_TOKEN: - val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r); - break; - case CATEGORY2_TOKEN: - val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r); - break; - case CATEGORY3_TOKEN: - val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r); - break; - case CATEGORY4_TOKEN: - val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r); - break; - case CATEGORY5_TOKEN: - val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r); - break; - case CATEGORY6_TOKEN: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, cat6_bits, r); - break; - } - } - v = (val * dqv) >> dq_shift; -#if CONFIG_COEFFICIENT_RANGE_CHECKING -#if CONFIG_VP9_HIGHBITDEPTH - dqcoeff[scan[c]] = highbd_check_range((vpx_read_bit(r) ? -v : v), - xd->bd); -#else - dqcoeff[scan[c]] = check_range(vpx_read_bit(r) ? -v : v); -#endif // CONFIG_VP9_HIGHBITDEPTH -#else - dqcoeff[scan[c]] = vpx_read_bit(r) ? -v : v; -#endif // CONFIG_COEFFICIENT_RANGE_CHECKING - token_cache[scan[c]] = vp9_pt_energy_class[token]; - ++c; - ctx = get_coef_context(nb, token_cache, c); - dqv = dq[1]; - } - - return c; -} - -static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l, - int x, int y, unsigned int tx_size_in_blocks) { - if (xd->max_blocks_wide) { - if (tx_size_in_blocks + x > xd->max_blocks_wide) - *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8; - } - if (xd->max_blocks_high) { - if (tx_size_in_blocks + y > xd->max_blocks_high) - *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8; - } -} - -int vp9_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, - int x, int y, TX_SIZE tx_size, vpx_reader *r, - int seg_id) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int16_t *const dequant = pd->seg_dequant[seg_id]; - int eob; - ENTROPY_CONTEXT *a = pd->above_context + x; - ENTROPY_CONTEXT *l = pd->left_context + y; - int ctx; - int ctx_shift_a = 0; - int ctx_shift_l = 0; - - switch (tx_size) { - case TX_4X4: - ctx = a[0] != 0; - ctx += l[0] != 0; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - a[0] = l[0] = (eob > 0); - break; - case TX_8X8: - get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8); - ctx = !!*(const uint16_t *)a; - ctx += !!*(const uint16_t *)l; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a; - *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l; - break; - case TX_16X16: - get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16); - ctx = !!*(const uint32_t *)a; - ctx += !!*(const uint32_t *)l; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a; - *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l; - break; - case TX_32X32: - get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32); - // NOTE: casting to uint64_t here is safe because the default memory - // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte - // boundaries. - ctx = !!*(const uint64_t *)a; - ctx += !!*(const uint64_t *)l; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a; - *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l; - break; - default: - assert(0 && "Invalid transform size."); - eob = 0; - break; - } - - return eob; -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h deleted file mode 100644 index d242d4466e..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DETOKENIZE_H_ -#define VP9_DECODER_VP9_DETOKENIZE_H_ - -#include "vpx_dsp/bitreader.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/common/vp9_scan.h" - -#ifdef __cplusplus -extern "C" { -#endif - -int vp9_decode_block_tokens(MACROBLOCKD *xd, - int plane, const scan_order *sc, - int x, int y, - TX_SIZE tx_size, vpx_reader *r, - int seg_id); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c deleted file mode 100644 index 05b38538ae..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vp9/common/vp9_entropy.h" - -#include "vp9/decoder/vp9_dsubexp.h" - -static int inv_recenter_nonneg(int v, int m) { - if (v > 2 * m) - return v; - - return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1); -} - -static int decode_uniform(vpx_reader *r) { - const int l = 8; - const int m = (1 << l) - 191; - const int v = vpx_read_literal(r, l - 1); - return v < m ? v : (v << 1) - m + vpx_read_bit(r); -} - -static int inv_remap_prob(int v, int m) { - static uint8_t inv_map_table[MAX_PROB] = { - 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189, - 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, - 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76, - 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, - 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, - 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, - 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, - 142, 143, 144, 145, 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, - 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, - 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, - 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222, - 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, - 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 253 - }; - assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0]))); - v = inv_map_table[v]; - m--; - if ((m << 1) <= MAX_PROB) { - return 1 + inv_recenter_nonneg(v, m); - } else { - return MAX_PROB - inv_recenter_nonneg(v, MAX_PROB - 1 - m); - } -} - -static int decode_term_subexp(vpx_reader *r) { - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 4); - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 4) + 16; - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 5) + 32; - return decode_uniform(r) + 64; -} - -void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p) { - if (vpx_read(r, DIFF_UPDATE_PROB)) { - const int delp = decode_term_subexp(r); - *p = (vpx_prob)inv_remap_prob(delp, *p); - } -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h deleted file mode 100644 index a8bcc70be9..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DSUBEXP_H_ -#define VP9_DECODER_VP9_DSUBEXP_H_ - -#include "vpx_dsp/bitreader.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DSUBEXP_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.c b/thirdparty/libvpx/vp9/decoder/vp9_dthread.c deleted file mode 100644 index 14a71448fe..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dthread.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_decoder.h" - -// #define DEBUG_THREAD - -// TODO(hkuang): Clean up all the #ifdef in this file. -void vp9_frameworker_lock_stats(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const worker_data = worker->data1; - pthread_mutex_lock(&worker_data->stats_mutex); -#else - (void)worker; -#endif -} - -void vp9_frameworker_unlock_stats(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const worker_data = worker->data1; - pthread_mutex_unlock(&worker_data->stats_mutex); -#else - (void)worker; -#endif -} - -void vp9_frameworker_signal_stats(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const worker_data = worker->data1; - -// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper. -#if defined(_WIN32) && !HAVE_PTHREAD_H - pthread_cond_signal(&worker_data->stats_cond); -#else - pthread_cond_broadcast(&worker_data->stats_cond); -#endif - -#else - (void)worker; -#endif -} - -// This macro prevents thread_sanitizer from reporting known concurrent writes. -#if defined(__has_feature) -#if __has_feature(thread_sanitizer) -#define BUILDING_WITH_TSAN -#endif -#endif - -// TODO(hkuang): Remove worker parameter as it is only used in debug code. -void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, - int row) { -#if CONFIG_MULTITHREAD - if (!ref_buf) - return; - -#ifndef BUILDING_WITH_TSAN - // The following line of code will get harmless tsan error but it is the key - // to get best performance. - if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return; -#endif - - { - // Find the worker thread that owns the reference frame. If the reference - // frame has been fully decoded, it may not have owner. - VPxWorker *const ref_worker = ref_buf->frame_worker_owner; - FrameWorkerData *const ref_worker_data = - (FrameWorkerData *)ref_worker->data1; - const VP9Decoder *const pbi = ref_worker_data->pbi; - -#ifdef DEBUG_THREAD - { - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n", - worker_data->worker_id, worker, ref_worker_data->worker_id, - ref_buf->frame_worker_owner, row, ref_buf->row); - } -#endif - - vp9_frameworker_lock_stats(ref_worker); - while (ref_buf->row < row && pbi->cur_buf == ref_buf && - ref_buf->buf.corrupted != 1) { - pthread_cond_wait(&ref_worker_data->stats_cond, - &ref_worker_data->stats_mutex); - } - - if (ref_buf->buf.corrupted == 1) { - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - vp9_frameworker_unlock_stats(ref_worker); - vpx_internal_error(&worker_data->pbi->common.error, - VPX_CODEC_CORRUPT_FRAME, - "Worker %p failed to decode frame", worker); - } - vp9_frameworker_unlock_stats(ref_worker); - } -#else - (void)worker; - (void)ref_buf; - (void)row; - (void)ref_buf; -#endif // CONFIG_MULTITHREAD -} - -void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) { -#if CONFIG_MULTITHREAD - VPxWorker *worker = buf->frame_worker_owner; - -#ifdef DEBUG_THREAD - { - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id, - buf->frame_worker_owner, row); - } -#endif - - vp9_frameworker_lock_stats(worker); - buf->row = row; - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); -#else - (void)buf; - (void)row; -#endif // CONFIG_MULTITHREAD -} - -void vp9_frameworker_copy_context(VPxWorker *const dst_worker, - VPxWorker *const src_worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1; - FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1; - VP9_COMMON *const src_cm = &src_worker_data->pbi->common; - VP9_COMMON *const dst_cm = &dst_worker_data->pbi->common; - int i; - - // Wait until source frame's context is ready. - vp9_frameworker_lock_stats(src_worker); - while (!src_worker_data->frame_context_ready) { - pthread_cond_wait(&src_worker_data->stats_cond, - &src_worker_data->stats_mutex); - } - - dst_cm->last_frame_seg_map = src_cm->seg.enabled ? - src_cm->current_frame_seg_map : src_cm->last_frame_seg_map; - dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; - vp9_frameworker_unlock_stats(src_worker); - - dst_cm->bit_depth = src_cm->bit_depth; -#if CONFIG_VP9_HIGHBITDEPTH - dst_cm->use_highbitdepth = src_cm->use_highbitdepth; -#endif - dst_cm->prev_frame = src_cm->show_existing_frame ? - src_cm->prev_frame : src_cm->cur_frame; - dst_cm->last_width = !src_cm->show_existing_frame ? - src_cm->width : src_cm->last_width; - dst_cm->last_height = !src_cm->show_existing_frame ? - src_cm->height : src_cm->last_height; - dst_cm->subsampling_x = src_cm->subsampling_x; - dst_cm->subsampling_y = src_cm->subsampling_y; - dst_cm->frame_type = src_cm->frame_type; - dst_cm->last_show_frame = !src_cm->show_existing_frame ? - src_cm->show_frame : src_cm->last_show_frame; - for (i = 0; i < REF_FRAMES; ++i) - dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i]; - - memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr, - (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh)); - dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level; - dst_cm->lf.filter_level = src_cm->lf.filter_level; - memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, MAX_REF_LF_DELTAS); - memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); - dst_cm->seg = src_cm->seg; - memcpy(dst_cm->frame_contexts, src_cm->frame_contexts, - FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0])); -#else - (void) dst_worker; - (void) src_worker; -#endif // CONFIG_MULTITHREAD -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.h b/thirdparty/libvpx/vp9/decoder/vp9_dthread.h deleted file mode 100644 index ba7c38a511..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dthread.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DTHREAD_H_ -#define VP9_DECODER_VP9_DTHREAD_H_ - -#include "./vpx_config.h" -#include "vpx_util/vpx_thread.h" -#include "vpx/internal/vpx_codec_internal.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; -struct VP9Decoder; - -// WorkerData for the FrameWorker thread. It contains all the information of -// the worker and decode structures for decoding a frame. -typedef struct FrameWorkerData { - struct VP9Decoder *pbi; - const uint8_t *data; - const uint8_t *data_end; - size_t data_size; - void *user_priv; - int result; - int worker_id; - int received_frame; - - // scratch_buffer is used in frame parallel mode only. - // It is used to make a copy of the compressed data. - uint8_t *scratch_buffer; - size_t scratch_buffer_size; - -#if CONFIG_MULTITHREAD - pthread_mutex_t stats_mutex; - pthread_cond_t stats_cond; -#endif - - int frame_context_ready; // Current frame's context is ready to read. - int frame_decoded; // Finished decoding current frame. -} FrameWorkerData; - -void vp9_frameworker_lock_stats(VPxWorker *const worker); -void vp9_frameworker_unlock_stats(VPxWorker *const worker); -void vp9_frameworker_signal_stats(VPxWorker *const worker); - -// Wait until ref_buf has been decoded to row in real pixel unit. -// Note: worker may already finish decoding ref_buf and release it in order to -// start decoding next frame. So need to check whether worker is still decoding -// ref_buf. -void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, - int row); - -// FrameWorker broadcasts its decoding progress so other workers that are -// waiting on it can resume decoding. -void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row); - -// Copy necessary decoding context from src worker to dst worker. -void vp9_frameworker_copy_context(VPxWorker *const dst_worker, - VPxWorker *const src_worker); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.c b/thirdparty/libvpx/vp9/vp9_dx_iface.c deleted file mode 100644 index 6531e2c618..0000000000 --- a/thirdparty/libvpx/vp9/vp9_dx_iface.c +++ /dev/null @@ -1,1093 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "./vpx_config.h" -#include "./vpx_version.h" - -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_decoder.h" -#include "vpx_dsp/bitreader_buffer.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_frame_buffers.h" - -#include "vp9/decoder/vp9_decodeframe.h" - -#include "vp9/vp9_dx_iface.h" -#include "vp9/vp9_iface_common.h" - -#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) - -static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { - // This function only allocates space for the vpx_codec_alg_priv_t - // structure. More memory may be required at the time the stream - // information becomes known. - (void)data; - - if (!ctx->priv) { - vpx_codec_alg_priv_t *const priv = - (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); - if (priv == NULL) - return VPX_CODEC_MEM_ERROR; - - ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->init_flags = ctx->init_flags; - priv->si.sz = sizeof(priv->si); - priv->flushed = 0; - // Only do frame parallel decode when threads > 1. - priv->frame_parallel_decode = - (ctx->config.dec && (ctx->config.dec->threads > 1) && - (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING)) ? 1 : 0; - if (ctx->config.dec) { - priv->cfg = *ctx->config.dec; - ctx->config.dec = &priv->cfg; - } - } - - return VPX_CODEC_OK; -} - -static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { - if (ctx->frame_workers != NULL) { - int i; - for (i = 0; i < ctx->num_frame_workers; ++i) { - VPxWorker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - vpx_get_worker_interface()->end(worker); - vp9_remove_common(&frame_worker_data->pbi->common); -#if CONFIG_VP9_POSTPROC - vp9_free_postproc_buffers(&frame_worker_data->pbi->common); -#endif - vp9_decoder_remove(frame_worker_data->pbi); - vpx_free(frame_worker_data->scratch_buffer); -#if CONFIG_MULTITHREAD - pthread_mutex_destroy(&frame_worker_data->stats_mutex); - pthread_cond_destroy(&frame_worker_data->stats_cond); -#endif - vpx_free(frame_worker_data); - } -#if CONFIG_MULTITHREAD - pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex); -#endif - } - - if (ctx->buffer_pool) { - vp9_free_ref_frame_buffers(ctx->buffer_pool); - vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); - } - - vpx_free(ctx->frame_workers); - vpx_free(ctx->buffer_pool); - vpx_free(ctx); - return VPX_CODEC_OK; -} - -static int parse_bitdepth_colorspace_sampling( - BITSTREAM_PROFILE profile, struct vpx_read_bit_buffer *rb) { - vpx_color_space_t color_space; - if (profile >= PROFILE_2) - rb->bit_offset += 1; // Bit-depth 10 or 12. - color_space = (vpx_color_space_t)vpx_rb_read_literal(rb, 3); - if (color_space != VPX_CS_SRGB) { - rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range. - if (profile == PROFILE_1 || profile == PROFILE_3) { - rb->bit_offset += 2; // subsampling x/y. - rb->bit_offset += 1; // unused. - } - } else { - if (profile == PROFILE_1 || profile == PROFILE_3) { - rb->bit_offset += 1; // unused - } else { - // RGB is only available in version 1. - return 0; - } - } - return 1; -} - -static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si, - int *is_intra_only, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - int intra_only_flag = 0; - uint8_t clear_buffer[10]; - - if (data + data_sz <= data) - return VPX_CODEC_INVALID_PARAM; - - si->is_kf = 0; - si->w = si->h = 0; - - if (decrypt_cb) { - data_sz = VPXMIN(sizeof(clear_buffer), data_sz); - decrypt_cb(decrypt_state, data, clear_buffer, data_sz); - data = clear_buffer; - } - - // A maximum of 6 bits are needed to read the frame marker, profile and - // show_existing_frame. - if (data_sz < 1) - return VPX_CODEC_UNSUP_BITSTREAM; - - { - int show_frame; - int error_resilient; - struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL }; - const int frame_marker = vpx_rb_read_literal(&rb, 2); - const BITSTREAM_PROFILE profile = vp9_read_profile(&rb); - - if (frame_marker != VP9_FRAME_MARKER) - return VPX_CODEC_UNSUP_BITSTREAM; - - if (profile >= MAX_PROFILES) - return VPX_CODEC_UNSUP_BITSTREAM; - - if (vpx_rb_read_bit(&rb)) { // show an existing frame - // If profile is > 2 and show_existing_frame is true, then at least 1 more - // byte (6+3=9 bits) is needed. - if (profile > 2 && data_sz < 2) - return VPX_CODEC_UNSUP_BITSTREAM; - vpx_rb_read_literal(&rb, 3); // Frame buffer to show. - return VPX_CODEC_OK; - } - - // For the rest of the function, a maximum of 9 more bytes are needed - // (computed by taking the maximum possible bits needed in each case). Note - // that this has to be updated if we read any more bits in this function. - if (data_sz < 10) - return VPX_CODEC_UNSUP_BITSTREAM; - - si->is_kf = !vpx_rb_read_bit(&rb); - show_frame = vpx_rb_read_bit(&rb); - error_resilient = vpx_rb_read_bit(&rb); - - if (si->is_kf) { - if (!vp9_read_sync_code(&rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - - if (!parse_bitdepth_colorspace_sampling(profile, &rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); - } else { - intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb); - - rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context - - if (intra_only_flag) { - if (!vp9_read_sync_code(&rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - if (profile > PROFILE_0) { - if (!parse_bitdepth_colorspace_sampling(profile, &rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - } - rb.bit_offset += REF_FRAMES; // refresh_frame_flags - vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); - } - } - } - if (is_intra_only != NULL) - *is_intra_only = intra_only_flag; - return VPX_CODEC_OK; -} - -static vpx_codec_err_t decoder_peek_si(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si) { - return decoder_peek_si_internal(data, data_sz, si, NULL, NULL, NULL); -} - -static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si) { - const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) - ? sizeof(vp9_stream_info_t) - : sizeof(vpx_codec_stream_info_t); - memcpy(si, &ctx->si, sz); - si->sz = (unsigned int)sz; - - return VPX_CODEC_OK; -} - -static void set_error_detail(vpx_codec_alg_priv_t *ctx, - const char *const error) { - ctx->base.err_detail = error; -} - -static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, - const struct vpx_internal_error_info *error) { - if (error->error_code) - set_error_detail(ctx, error->has_detail ? error->detail : NULL); - - return error->error_code; -} - -static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { - int i; - - for (i = 0; i < ctx->num_frame_workers; ++i) { - VPxWorker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - VP9_COMMON *const cm = &frame_worker_data->pbi->common; - BufferPool *const pool = cm->buffer_pool; - - cm->new_fb_idx = INVALID_IDX; - cm->byte_alignment = ctx->byte_alignment; - cm->skip_loop_filter = ctx->skip_loop_filter; - - if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { - pool->get_fb_cb = ctx->get_ext_fb_cb; - pool->release_fb_cb = ctx->release_ext_fb_cb; - pool->cb_priv = ctx->ext_priv; - } else { - pool->get_fb_cb = vp9_get_frame_buffer; - pool->release_fb_cb = vp9_release_frame_buffer; - - if (vp9_alloc_internal_frame_buffers(&pool->int_frame_buffers)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to initialize internal frame buffers"); - - pool->cb_priv = &pool->int_frame_buffers; - } - } -} - -static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { - cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; - cfg->deblocking_level = 4; - cfg->noise_level = 0; -} - -static void set_ppflags(const vpx_codec_alg_priv_t *ctx, - vp9_ppflags_t *flags) { - flags->post_proc_flag = - ctx->postproc_cfg.post_proc_flag; - - flags->deblocking_level = ctx->postproc_cfg.deblocking_level; - flags->noise_level = ctx->postproc_cfg.noise_level; -} - -static int frame_worker_hook(void *arg1, void *arg2) { - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; - const uint8_t *data = frame_worker_data->data; - (void)arg2; - - frame_worker_data->result = - vp9_receive_compressed_data(frame_worker_data->pbi, - frame_worker_data->data_size, - &data); - frame_worker_data->data_end = data; - - if (frame_worker_data->pbi->frame_parallel_decode) { - // In frame parallel decoding, a worker thread must successfully decode all - // the compressed data. - if (frame_worker_data->result != 0 || - frame_worker_data->data + frame_worker_data->data_size - 1 > data) { - VPxWorker *const worker = frame_worker_data->pbi->frame_worker_owner; - BufferPool *const pool = frame_worker_data->pbi->common.buffer_pool; - // Signal all the other threads that are waiting for this frame. - vp9_frameworker_lock_stats(worker); - frame_worker_data->frame_context_ready = 1; - lock_buffer_pool(pool); - frame_worker_data->pbi->cur_buf->buf.corrupted = 1; - unlock_buffer_pool(pool); - frame_worker_data->pbi->need_resync = 1; - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); - return 0; - } - } else if (frame_worker_data->result != 0) { - // Check decode result in serial decode. - frame_worker_data->pbi->cur_buf->buf.corrupted = 1; - frame_worker_data->pbi->need_resync = 1; - } - return !frame_worker_data->result; -} - -static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { - int i; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - - ctx->last_show_frame = -1; - ctx->next_submit_worker_id = 0; - ctx->last_submit_worker_id = 0; - ctx->next_output_worker_id = 0; - ctx->frame_cache_read = 0; - ctx->frame_cache_write = 0; - ctx->num_cache_frames = 0; - ctx->need_resync = 1; - ctx->num_frame_workers = - (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; - if (ctx->num_frame_workers > MAX_DECODE_THREADS) - ctx->num_frame_workers = MAX_DECODE_THREADS; - ctx->available_threads = ctx->num_frame_workers; - ctx->flushed = 0; - - ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); - if (ctx->buffer_pool == NULL) - return VPX_CODEC_MEM_ERROR; - -#if CONFIG_MULTITHREAD - if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) { - set_error_detail(ctx, "Failed to allocate buffer pool mutex"); - return VPX_CODEC_MEM_ERROR; - } -#endif - - ctx->frame_workers = (VPxWorker *) - vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); - if (ctx->frame_workers == NULL) { - set_error_detail(ctx, "Failed to allocate frame_workers"); - return VPX_CODEC_MEM_ERROR; - } - - for (i = 0; i < ctx->num_frame_workers; ++i) { - VPxWorker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *frame_worker_data = NULL; - winterface->init(worker); - worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); - if (worker->data1 == NULL) { - set_error_detail(ctx, "Failed to allocate frame_worker_data"); - return VPX_CODEC_MEM_ERROR; - } - frame_worker_data = (FrameWorkerData *)worker->data1; - frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); - if (frame_worker_data->pbi == NULL) { - set_error_detail(ctx, "Failed to allocate frame_worker_data"); - return VPX_CODEC_MEM_ERROR; - } - frame_worker_data->pbi->frame_worker_owner = worker; - frame_worker_data->worker_id = i; - frame_worker_data->scratch_buffer = NULL; - frame_worker_data->scratch_buffer_size = 0; - frame_worker_data->frame_context_ready = 0; - frame_worker_data->received_frame = 0; -#if CONFIG_MULTITHREAD - if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) { - set_error_detail(ctx, "Failed to allocate frame_worker_data mutex"); - return VPX_CODEC_MEM_ERROR; - } - - if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) { - set_error_detail(ctx, "Failed to allocate frame_worker_data cond"); - return VPX_CODEC_MEM_ERROR; - } -#endif - // If decoding in serial mode, FrameWorker thread could create tile worker - // thread or loopfilter thread. - frame_worker_data->pbi->max_threads = - (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; - - frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; - frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; - frame_worker_data->pbi->common.frame_parallel_decode = - ctx->frame_parallel_decode; - worker->hook = (VPxWorkerHook)frame_worker_hook; - if (!winterface->reset(worker)) { - set_error_detail(ctx, "Frame Worker thread creation failed"); - return VPX_CODEC_MEM_ERROR; - } - } - - // If postprocessing was enabled by the application and a - // configuration has not been provided, default it. - if (!ctx->postproc_cfg_set && - (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) - set_default_ppflags(&ctx->postproc_cfg); - - init_buffer_callbacks(ctx); - - return VPX_CODEC_OK; -} - -static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx, - const VP9Decoder *const pbi) { - // Clear resync flag if worker got a key frame or intra only frame. - if (ctx->need_resync == 1 && pbi->need_resync == 0 && - (pbi->common.intra_only || pbi->common.frame_type == KEY_FRAME)) - ctx->need_resync = 0; -} - -static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, - const uint8_t **data, unsigned int data_sz, - void *user_priv, int64_t deadline) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - (void)deadline; - - // Determine the stream parameters. Note that we rely on peek_si to - // validate that we have a buffer that does not wrap around the top - // of the heap. - if (!ctx->si.h) { - int is_intra_only = 0; - const vpx_codec_err_t res = - decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only, - ctx->decrypt_cb, ctx->decrypt_state); - if (res != VPX_CODEC_OK) - return res; - - if (!ctx->si.is_kf && !is_intra_only) - return VPX_CODEC_ERROR; - } - - if (!ctx->frame_parallel_decode) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - frame_worker_data->data = *data; - frame_worker_data->data_size = data_sz; - frame_worker_data->user_priv = user_priv; - frame_worker_data->received_frame = 1; - - // Set these even if already initialized. The caller may have changed the - // decrypt config between frames. - frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb; - frame_worker_data->pbi->decrypt_state = ctx->decrypt_state; - - worker->had_error = 0; - winterface->execute(worker); - - // Update data pointer after decode. - *data = frame_worker_data->data_end; - - if (worker->had_error) - return update_error_state(ctx, &frame_worker_data->pbi->common.error); - - check_resync(ctx, frame_worker_data->pbi); - } else { - VPxWorker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id]; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - // Copy context from last worker thread to next worker thread. - if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) - vp9_frameworker_copy_context( - &ctx->frame_workers[ctx->next_submit_worker_id], - &ctx->frame_workers[ctx->last_submit_worker_id]); - - frame_worker_data->pbi->ready_for_new_data = 0; - // Copy the compressed data into worker's internal buffer. - // TODO(hkuang): Will all the workers allocate the same size - // as the size of the first intra frame be better? This will - // avoid too many deallocate and allocate. - if (frame_worker_data->scratch_buffer_size < data_sz) { - frame_worker_data->scratch_buffer = - (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz); - if (frame_worker_data->scratch_buffer == NULL) { - set_error_detail(ctx, "Failed to reallocate scratch buffer"); - return VPX_CODEC_MEM_ERROR; - } - frame_worker_data->scratch_buffer_size = data_sz; - } - frame_worker_data->data_size = data_sz; - memcpy(frame_worker_data->scratch_buffer, *data, data_sz); - - frame_worker_data->frame_decoded = 0; - frame_worker_data->frame_context_ready = 0; - frame_worker_data->received_frame = 1; - frame_worker_data->data = frame_worker_data->scratch_buffer; - frame_worker_data->user_priv = user_priv; - - if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) - ctx->last_submit_worker_id = - (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers; - - ctx->next_submit_worker_id = - (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers; - --ctx->available_threads; - worker->had_error = 0; - winterface->launch(worker); - } - - return VPX_CODEC_OK; -} - -static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { - YV12_BUFFER_CONFIG sd; - vp9_ppflags_t flags = {0, 0, 0}; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - ctx->next_output_worker_id = - (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; - // TODO(hkuang): Add worker error handling here. - winterface->sync(worker); - frame_worker_data->received_frame = 0; - ++ctx->available_threads; - - check_resync(ctx, frame_worker_data->pbi); - - if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { - VP9_COMMON *const cm = &frame_worker_data->pbi->common; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx; - yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd, - frame_worker_data->user_priv); - ctx->frame_cache[ctx->frame_cache_write].img.fb_priv = - frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - ctx->frame_cache_write = - (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE; - ++ctx->num_cache_frames; - } -} - -static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, unsigned int data_sz, - void *user_priv, long deadline) { - const uint8_t *data_start = data; - const uint8_t * const data_end = data + data_sz; - vpx_codec_err_t res; - uint32_t frame_sizes[8]; - int frame_count; - - if (data == NULL && data_sz == 0) { - ctx->flushed = 1; - return VPX_CODEC_OK; - } - - // Reset flushed when receiving a valid frame. - ctx->flushed = 0; - - // Initialize the decoder workers on the first frame. - if (ctx->frame_workers == NULL) { - const vpx_codec_err_t res = init_decoder(ctx); - if (res != VPX_CODEC_OK) - return res; - } - - res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count, - ctx->decrypt_cb, ctx->decrypt_state); - if (res != VPX_CODEC_OK) - return res; - - if (ctx->frame_parallel_decode) { - // Decode in frame parallel mode. When decoding in this mode, the frame - // passed to the decoder must be either a normal frame or a superframe with - // superframe index so the decoder could get each frame's start position - // in the superframe. - if (frame_count > 0) { - int i; - - for (i = 0; i < frame_count; ++i) { - const uint8_t *data_start_copy = data_start; - const uint32_t frame_size = frame_sizes[i]; - if (data_start < data - || frame_size > (uint32_t) (data_end - data_start)) { - set_error_detail(ctx, "Invalid frame size in index"); - return VPX_CODEC_CORRUPT_FRAME; - } - - if (ctx->available_threads == 0) { - // No more threads for decoding. Wait until the next output worker - // finishes decoding. Then copy the decoded frame into cache. - if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { - wait_worker_and_cache_frame(ctx); - } else { - // TODO(hkuang): Add unit test to test this path. - set_error_detail(ctx, "Frame output cache is full."); - return VPX_CODEC_ERROR; - } - } - - res = decode_one(ctx, &data_start_copy, frame_size, user_priv, - deadline); - if (res != VPX_CODEC_OK) - return res; - data_start += frame_size; - } - } else { - if (ctx->available_threads == 0) { - // No more threads for decoding. Wait until the next output worker - // finishes decoding. Then copy the decoded frame into cache. - if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { - wait_worker_and_cache_frame(ctx); - } else { - // TODO(hkuang): Add unit test to test this path. - set_error_detail(ctx, "Frame output cache is full."); - return VPX_CODEC_ERROR; - } - } - - res = decode_one(ctx, &data, data_sz, user_priv, deadline); - if (res != VPX_CODEC_OK) - return res; - } - } else { - // Decode in serial mode. - if (frame_count > 0) { - int i; - - for (i = 0; i < frame_count; ++i) { - const uint8_t *data_start_copy = data_start; - const uint32_t frame_size = frame_sizes[i]; - vpx_codec_err_t res; - if (data_start < data - || frame_size > (uint32_t) (data_end - data_start)) { - set_error_detail(ctx, "Invalid frame size in index"); - return VPX_CODEC_CORRUPT_FRAME; - } - - res = decode_one(ctx, &data_start_copy, frame_size, user_priv, - deadline); - if (res != VPX_CODEC_OK) - return res; - - data_start += frame_size; - } - } else { - while (data_start < data_end) { - const uint32_t frame_size = (uint32_t) (data_end - data_start); - const vpx_codec_err_t res = decode_one(ctx, &data_start, frame_size, - user_priv, deadline); - if (res != VPX_CODEC_OK) - return res; - - // Account for suboptimal termination by the encoder. - while (data_start < data_end) { - const uint8_t marker = read_marker(ctx->decrypt_cb, - ctx->decrypt_state, data_start); - if (marker) - break; - ++data_start; - } - } - } - } - - return res; -} - -static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) { - RefCntBuffer *const frame_bufs = ctx->buffer_pool->frame_bufs; - // Decrease reference count of last output frame in frame parallel mode. - if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { - BufferPool *const pool = ctx->buffer_pool; - lock_buffer_pool(pool); - decrease_ref_count(ctx->last_show_frame, frame_bufs, pool); - unlock_buffer_pool(pool); - } -} - -static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) { - vpx_image_t *img = NULL; - - // Only return frame when all the cpu are busy or - // application fluhsed the decoder in frame parallel decode. - if (ctx->frame_parallel_decode && ctx->available_threads > 0 && - !ctx->flushed) { - return NULL; - } - - // Output the frames in the cache first. - if (ctx->num_cache_frames > 0) { - release_last_output_frame(ctx); - ctx->last_show_frame = ctx->frame_cache[ctx->frame_cache_read].fb_idx; - if (ctx->need_resync) - return NULL; - img = &ctx->frame_cache[ctx->frame_cache_read].img; - ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE; - --ctx->num_cache_frames; - return img; - } - - // iter acts as a flip flop, so an image is only returned on the first - // call to get_frame. - if (*iter == NULL && ctx->frame_workers != NULL) { - do { - YV12_BUFFER_CONFIG sd; - vp9_ppflags_t flags = {0, 0, 0}; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - VPxWorker *const worker = - &ctx->frame_workers[ctx->next_output_worker_id]; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - ctx->next_output_worker_id = - (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - set_ppflags(ctx, &flags); - // Wait for the frame from worker thread. - if (winterface->sync(worker)) { - // Check if worker has received any frames. - if (frame_worker_data->received_frame == 1) { - ++ctx->available_threads; - frame_worker_data->received_frame = 0; - check_resync(ctx, frame_worker_data->pbi); - } - if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { - VP9_COMMON *const cm = &frame_worker_data->pbi->common; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - release_last_output_frame(ctx); - ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx; - if (ctx->need_resync) - return NULL; - yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv); - ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - img = &ctx->img; - return img; - } - } else { - // Decoding failed. Release the worker thread. - frame_worker_data->received_frame = 0; - ++ctx->available_threads; - ctx->need_resync = 1; - if (ctx->flushed != 1) - return NULL; - } - } while (ctx->next_output_worker_id != ctx->next_submit_worker_id); - } - return NULL; -} - -static vpx_codec_err_t decoder_set_fb_fn( - vpx_codec_alg_priv_t *ctx, - vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { - if (cb_get == NULL || cb_release == NULL) { - return VPX_CODEC_INVALID_PARAM; - } else if (ctx->frame_workers == NULL) { - // If the decoder has already been initialized, do not accept changes to - // the frame buffer functions. - ctx->get_ext_fb_cb = cb_get; - ctx->release_ext_fb_cb = cb_release; - ctx->ext_priv = cb_priv; - return VPX_CODEC_OK; - } - - return VPX_CODEC_ERROR; -} - -static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (data) { - vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; - YV12_BUFFER_CONFIG sd; - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(&frame_worker_data->pbi->common, - (VP9_REFFRAME)frame->frame_type, &sd); - } else { - return VPX_CODEC_INVALID_PARAM; - } -} - -static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (data) { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *) data; - YV12_BUFFER_CONFIG sd; - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - image2yuvconfig(&frame->img, &sd); - return vp9_copy_reference_dec(frame_worker_data->pbi, - (VP9_REFFRAME)frame->frame_type, &sd); - } else { - return VPX_CODEC_INVALID_PARAM; - } -} - -static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (data) { - YV12_BUFFER_CONFIG* fb; - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - fb = get_ref_frame(&frame_worker_data->pbi->common, data->idx); - if (fb == NULL) return VPX_CODEC_ERROR; - yuvconfig2image(&data->img, fb, NULL); - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -} - -static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_VP9_POSTPROC - vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); - - if (data) { - ctx->postproc_cfg_set = 1; - ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, - va_list args) { - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -} - -static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const update_info = va_arg(args, int *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (update_info) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - *update_info = frame_worker_data->pbi->refresh_frame_flags; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *corrupted = va_arg(args, int *); - - if (corrupted) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - RefCntBuffer *const frame_bufs = - frame_worker_data->pbi->common.buffer_pool->frame_bufs; - if (frame_worker_data->pbi->common.frame_to_show == NULL) - return VPX_CODEC_ERROR; - if (ctx->last_show_frame >= 0) - *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const frame_size = va_arg(args, int *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (frame_size) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - frame_size[0] = cm->width; - frame_size[1] = cm->height; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const render_size = va_arg(args, int *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (render_size) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - render_size[0] = cm->render_width; - render_size[1] = cm->render_height; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_bit_depth(vpx_codec_alg_priv_t *ctx, - va_list args) { - unsigned int *const bit_depth = va_arg(args, unsigned int *); - VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; - - if (bit_depth) { - if (worker) { - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - *bit_depth = cm->bit_depth; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, - va_list args) { - ctx->invert_tile_order = va_arg(args, int); - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx, - va_list args) { - vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); - ctx->decrypt_cb = init ? init->decrypt_cb : NULL; - ctx->decrypt_state = init ? init->decrypt_state : NULL; - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx, - va_list args) { - const int legacy_byte_alignment = 0; - const int min_byte_alignment = 32; - const int max_byte_alignment = 1024; - const int byte_alignment = va_arg(args, int); - - if (byte_alignment != legacy_byte_alignment && - (byte_alignment < min_byte_alignment || - byte_alignment > max_byte_alignment || - (byte_alignment & (byte_alignment - 1)) != 0)) - return VPX_CODEC_INVALID_PARAM; - - ctx->byte_alignment = byte_alignment; - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - frame_worker_data->pbi->common.byte_alignment = byte_alignment; - } - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx, - va_list args) { - ctx->skip_loop_filter = va_arg(args, int); - - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter; - } - - return VPX_CODEC_OK; -} - -static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { - {VP8_COPY_REFERENCE, ctrl_copy_reference}, - - // Setters - {VP8_SET_REFERENCE, ctrl_set_reference}, - {VP8_SET_POSTPROC, ctrl_set_postproc}, - {VP8_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options}, - {VP8_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options}, - {VP8_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options}, - {VP8_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options}, - {VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order}, - {VPXD_SET_DECRYPTOR, ctrl_set_decryptor}, - {VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment}, - {VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter}, - - // Getters - {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, - {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, - {VP9_GET_REFERENCE, ctrl_get_reference}, - {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size}, - {VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth}, - {VP9D_GET_FRAME_SIZE, ctrl_get_frame_size}, - - { -1, NULL}, -}; - -#ifndef VERSION_STRING -#define VERSION_STRING -#endif -CODEC_INTERFACE(vpx_codec_vp9_dx) = { - "WebM Project VP9 Decoder" VERSION_STRING, - VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | - VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t - decoder_init, // vpx_codec_init_fn_t - decoder_destroy, // vpx_codec_destroy_fn_t - decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t - { // NOLINT - decoder_peek_si, // vpx_codec_peek_si_fn_t - decoder_get_si, // vpx_codec_get_si_fn_t - decoder_decode, // vpx_codec_decode_fn_t - decoder_get_frame, // vpx_codec_frame_get_fn_t - decoder_set_fb_fn, // vpx_codec_set_fb_fn_t - }, - { // NOLINT - 0, - NULL, // vpx_codec_enc_cfg_map_t - NULL, // vpx_codec_encode_fn_t - NULL, // vpx_codec_get_cx_data_fn_t - NULL, // vpx_codec_enc_config_set_fn_t - NULL, // vpx_codec_get_global_headers_fn_t - NULL, // vpx_codec_get_preview_frame_fn_t - NULL // vpx_codec_enc_mr_get_mem_loc_fn_t - } -}; diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.h b/thirdparty/libvpx/vp9/vp9_dx_iface.h deleted file mode 100644 index e0e948e16c..0000000000 --- a/thirdparty/libvpx/vp9/vp9_dx_iface.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_VP9_DX_IFACE_H_ -#define VP9_VP9_DX_IFACE_H_ - -#include "vp9/decoder/vp9_decoder.h" - -typedef vpx_codec_stream_info_t vp9_stream_info_t; - -// This limit is due to framebuffer numbers. -// TODO(hkuang): Remove this limit after implementing ondemand framebuffers. -#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames. - -typedef struct cache_frame { - int fb_idx; - vpx_image_t img; -} cache_frame; - -struct vpx_codec_alg_priv { - vpx_codec_priv_t base; - vpx_codec_dec_cfg_t cfg; - vp9_stream_info_t si; - int postproc_cfg_set; - vp8_postproc_cfg_t postproc_cfg; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - vpx_image_t img; - int img_avail; - int flushed; - int invert_tile_order; - int last_show_frame; // Index of last output frame. - int byte_alignment; - int skip_loop_filter; - - // Frame parallel related. - int frame_parallel_decode; // frame-based threading. - VPxWorker *frame_workers; - int num_frame_workers; - int next_submit_worker_id; - int last_submit_worker_id; - int next_output_worker_id; - int available_threads; - cache_frame frame_cache[FRAME_CACHE_SIZE]; - int frame_cache_write; - int frame_cache_read; - int num_cache_frames; - int need_resync; // wait for key/intra-only frame - // BufferPool that holds all reference frames. Shared by all the FrameWorkers. - BufferPool *buffer_pool; - - // External frame buffer info to save for VP9 common. - void *ext_priv; // Private data associated with the external frame buffers. - vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; -}; - -#endif // VP9_VP9_DX_IFACE_H_ diff --git a/thirdparty/libvpx/vp9/vp9_iface_common.h b/thirdparty/libvpx/vp9/vp9_iface_common.h deleted file mode 100644 index 938d4224ba..0000000000 --- a/thirdparty/libvpx/vp9/vp9_iface_common.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VP9_VP9_IFACE_COMMON_H_ -#define VP9_VP9_IFACE_COMMON_H_ - -#include "vpx_ports/mem.h" - -static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, - void *user_priv) { - /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ - int bps; - if (!yv12->subsampling_y) { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I444; - bps = 24; - } else { - img->fmt = VPX_IMG_FMT_I422; - bps = 16; - } - } else { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I440; - bps = 16; - } else { - img->fmt = VPX_IMG_FMT_I420; - bps = 12; - } - } - img->cs = yv12->color_space; - img->range = yv12->color_range; - img->bit_depth = 8; - img->w = yv12->y_stride; - img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); - img->d_w = yv12->y_crop_width; - img->d_h = yv12->y_crop_height; - img->r_w = yv12->render_width; - img->r_h = yv12->render_height; - img->x_chroma_shift = yv12->subsampling_x; - img->y_chroma_shift = yv12->subsampling_y; - img->planes[VPX_PLANE_Y] = yv12->y_buffer; - img->planes[VPX_PLANE_U] = yv12->u_buffer; - img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = yv12->y_stride; - img->stride[VPX_PLANE_U] = yv12->uv_stride; - img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; -#if CONFIG_VP9_HIGHBITDEPTH - if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { - // vpx_image_t uses byte strides and a pointer to the first byte - // of the image. - img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); - img->bit_depth = yv12->bit_depth; - img->planes[VPX_PLANE_Y] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->y_buffer); - img->planes[VPX_PLANE_U] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->u_buffer); - img->planes[VPX_PLANE_V] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->v_buffer); - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; - img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - img->bps = bps; - img->user_priv = user_priv; - img->img_data = yv12->buffer_alloc; - img->img_data_owner = 0; - img->self_allocd = 0; -} - -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) { - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; - - yv12->y_crop_width = img->d_w; - yv12->y_crop_height = img->d_h; - yv12->render_width = img->r_w; - yv12->render_height = img->r_h; - yv12->y_width = img->d_w; - yv12->y_height = img->d_h; - - yv12->uv_width = img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 - : yv12->y_width; - yv12->uv_height = img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 - : yv12->y_height; - yv12->uv_crop_width = yv12->uv_width; - yv12->uv_crop_height = yv12->uv_height; - - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - yv12->color_space = img->cs; - yv12->color_range = img->range; - -#if CONFIG_VP9_HIGHBITDEPTH - if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { - // In vpx_image_t - // planes point to uint8 address of start of data - // stride counts uint8s to reach next row - // In YV12_BUFFER_CONFIG - // y_buffer, u_buffer, v_buffer point to uint16 address of data - // stride and border counts in uint16s - // This means that all the address calculations in the main body of code - // should work correctly. - // However, before we do any pixel operations we need to cast the address - // to a uint16 ponter and double its value. - yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); - yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); - yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); - yv12->y_stride >>= 1; - yv12->uv_stride >>= 1; - yv12->flags = YV12_FLAG_HIGHBITDEPTH; - } else { - yv12->flags = 0; - } - yv12->border = (yv12->y_stride - img->w) / 2; -#else - yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; -#endif // CONFIG_VP9_HIGHBITDEPTH - yv12->subsampling_x = img->x_chroma_shift; - yv12->subsampling_y = img->y_chroma_shift; - return VPX_CODEC_OK; -} - -#endif // VP9_VP9_IFACE_COMMON_H_ diff --git a/thirdparty/libvpx/vp9_rtcd.h b/thirdparty/libvpx/vp9_rtcd.h deleted file mode 100644 index cf2b463d63..0000000000 --- a/thirdparty/libvpx/vp9_rtcd.h +++ /dev/null @@ -1,9 +0,0 @@ -#include "vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - #include "rtcd/vp9_rtcd_x86.h" -#elif defined(WEBM_ARMASM) && ARCH_ARM - #include "rtcd/vp9_rtcd_arm.h" -#else - #include "rtcd/vp9_rtcd_c.h" -#endif diff --git a/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h b/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h deleted file mode 100644 index 7380fcc7e2..0000000000 --- a/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Describes the decoder algorithm interface for algorithm - * implementations. - * - * This file defines the private structures and data types that are only - * relevant to implementing an algorithm, as opposed to using it. - * - * To create a decoder algorithm class, an interface structure is put - * into the global namespace: - *
- *     my_codec.c:
- *       vpx_codec_iface_t my_codec = {
- *           "My Codec v1.0",
- *           VPX_CODEC_ALG_ABI_VERSION,
- *           ...
- *       };
- *     
- * - * An application instantiates a specific decoder instance by using - * vpx_codec_init() and a pointer to the algorithm's interface structure: - *
- *     my_app.c:
- *       extern vpx_codec_iface_t my_codec;
- *       {
- *           vpx_codec_ctx_t algo;
- *           res = vpx_codec_init(&algo, &my_codec);
- *       }
- *     
- * - * Once initialized, the instance is manged using other functions from - * the vpx_codec_* family. - */ -#ifndef VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ -#define VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ -#include "../vpx_decoder.h" -#include "../vpx_encoder.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/ - -typedef struct vpx_codec_alg_priv vpx_codec_alg_priv_t; -typedef struct vpx_codec_priv_enc_mr_cfg vpx_codec_priv_enc_mr_cfg_t; - -/*!\brief init function pointer prototype - * - * Performs algorithm-specific initialization of the decoder context. This - * function is called by the generic vpx_codec_init() wrapper function, so - * plugins implementing this interface may trust the input parameters to be - * properly initialized. - * - * \param[in] ctx Pointer to this instance's context - * \retval #VPX_CODEC_OK - * The input stream was recognized and decoder initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory operation failed. - */ -typedef vpx_codec_err_t (*vpx_codec_init_fn_t)(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data); - -/*!\brief destroy function pointer prototype - * - * Performs algorithm-specific destruction of the decoder context. This - * function is called by the generic vpx_codec_destroy() wrapper function, - * so plugins implementing this interface may trust the input parameters - * to be properly initialized. - * - * \param[in] ctx Pointer to this instance's context - * \retval #VPX_CODEC_OK - * The input stream was recognized and decoder initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory operation failed. - */ -typedef vpx_codec_err_t (*vpx_codec_destroy_fn_t)(vpx_codec_alg_priv_t *ctx); - -/*!\brief parse stream info function pointer prototype - * - * Performs high level parsing of the bitstream. This function is called by the - * generic vpx_codec_peek_stream_info() wrapper function, so plugins - * implementing this interface may trust the input parameters to be properly - * initialized. - * - * \param[in] data Pointer to a block of data to parse - * \param[in] data_sz Size of the data buffer - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ -typedef vpx_codec_err_t (*vpx_codec_peek_si_fn_t)(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si); - -/*!\brief Return information about the current stream. - * - * Returns information about the stream that has been parsed during decoding. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ -typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si); - -/*!\brief control function pointer prototype - * - * This function is used to exchange algorithm specific data with the decoder - * instance. This can be used to implement features specific to a particular - * algorithm. - * - * This function is called by the generic vpx_codec_control() wrapper - * function, so plugins implementing this interface may trust the input - * parameters to be properly initialized. However, this interface does not - * provide type safety for the exchanged data or assign meanings to the - * control codes. Those details should be specified in the algorithm's - * header file. In particular, the ctrl_id parameter is guaranteed to exist - * in the algorithm's control mapping table, and the data parameter may be NULL. - * - * - * \param[in] ctx Pointer to this instance's context - * \param[in] ctrl_id Algorithm specific control identifier - * \param[in,out] data Data to exchange with algorithm instance. - * - * \retval #VPX_CODEC_OK - * The internal state data was deserialized. - */ -typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx, - va_list ap); - -/*!\brief control function pointer mapping - * - * This structure stores the mapping between control identifiers and - * implementing functions. Each algorithm provides a list of these - * mappings. This list is searched by the vpx_codec_control() wrapper - * function to determine which function to invoke. The special - * value {0, NULL} is used to indicate end-of-list, and must be - * present. The special value {0, } can be used as a catch-all - * mapping. This implies that ctrl_id values chosen by the algorithm - * \ref MUST be non-zero. - */ -typedef const struct vpx_codec_ctrl_fn_map { - int ctrl_id; - vpx_codec_control_fn_t fn; -} vpx_codec_ctrl_fn_map_t; - -/*!\brief decode data function pointer prototype - * - * Processes a buffer of coded data. If the processing results in a new - * decoded frame becoming available, #VPX_CODEC_CB_PUT_SLICE and - * #VPX_CODEC_CB_PUT_FRAME events are generated as appropriate. This - * function is called by the generic vpx_codec_decode() wrapper function, - * so plugins implementing this interface may trust the input parameters - * to be properly initialized. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] data Pointer to this block of new coded data. If - * NULL, a #VPX_CODEC_CB_PUT_FRAME event is posted - * for the previously decoded frame. - * \param[in] data_sz Size of the coded data, in bytes. - * - * \return Returns #VPX_CODEC_OK if the coded data was processed completely - * and future pictures can be decoded without error. Otherwise, - * see the descriptions of the other error codes in ::vpx_codec_err_t - * for recoverability capabilities. - */ -typedef vpx_codec_err_t (*vpx_codec_decode_fn_t)(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline); - -/*!\brief Decoded frames iterator - * - * Iterates over a list of the frames available for display. The iterator - * storage should be initialized to NULL to start the iteration. Iteration is - * complete when this function returns NULL. - * - * The list of available frames becomes valid upon completion of the - * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode. - * - * \param[in] ctx Pointer to this instance's context - * \param[in out] iter Iterator storage, initialized to NULL - * - * \return Returns a pointer to an image, if one is ready for display. Frames - * produced will always be in PTS (presentation time stamp) order. - */ -typedef vpx_image_t *(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter); - -/*!\brief Pass in external frame buffers for the decoder to use. - * - * Registers functions to be called when libvpx needs a frame buffer - * to decode the current frame and a function to be called when libvpx does - * not internally reference the frame buffer. This set function must - * be called before the first call to decode or libvpx will assume the - * default behavior of allocating frame buffers internally. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb_get Pointer to the get callback function - * \param[in] cb_release Pointer to the release callback function - * \param[in] cb_priv Callback's private data - * - * \retval #VPX_CODEC_OK - * External frame buffers will be used by libvpx. - * \retval #VPX_CODEC_INVALID_PARAM - * One or more of the callbacks were NULL. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * using external frame buffers. - * - * \note - * When decoding VP9, the application may be required to pass in at least - * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame - * buffers. - */ -typedef vpx_codec_err_t (*vpx_codec_set_fb_fn_t)( - vpx_codec_alg_priv_t *ctx, - vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); - - -typedef vpx_codec_err_t (*vpx_codec_encode_fn_t)(vpx_codec_alg_priv_t *ctx, - const vpx_image_t *img, - vpx_codec_pts_t pts, - unsigned long duration, - vpx_enc_frame_flags_t flags, - unsigned long deadline); -typedef const vpx_codec_cx_pkt_t *(*vpx_codec_get_cx_data_fn_t)(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter); - -typedef vpx_codec_err_t -(*vpx_codec_enc_config_set_fn_t)(vpx_codec_alg_priv_t *ctx, - const vpx_codec_enc_cfg_t *cfg); -typedef vpx_fixed_buf_t * -(*vpx_codec_get_global_headers_fn_t)(vpx_codec_alg_priv_t *ctx); - -typedef vpx_image_t * -(*vpx_codec_get_preview_frame_fn_t)(vpx_codec_alg_priv_t *ctx); - -typedef vpx_codec_err_t -(*vpx_codec_enc_mr_get_mem_loc_fn_t)(const vpx_codec_enc_cfg_t *cfg, - void **mem_loc); - -/*!\brief usage configuration mapping - * - * This structure stores the mapping between usage identifiers and - * configuration structures. Each algorithm provides a list of these - * mappings. This list is searched by the vpx_codec_enc_config_default() - * wrapper function to determine which config to return. The special value - * {-1, {0}} is used to indicate end-of-list, and must be present. At least - * one mapping must be present, in addition to the end-of-list. - * - */ -typedef const struct vpx_codec_enc_cfg_map { - int usage; - vpx_codec_enc_cfg_t cfg; -} vpx_codec_enc_cfg_map_t; - -/*!\brief Decoder algorithm interface interface - * - * All decoders \ref MUST expose a variable of this type. - */ -struct vpx_codec_iface { - const char *name; /**< Identification String */ - int abi_version; /**< Implemented ABI version */ - vpx_codec_caps_t caps; /**< Decoder capabilities */ - vpx_codec_init_fn_t init; /**< \copydoc ::vpx_codec_init_fn_t */ - vpx_codec_destroy_fn_t destroy; /**< \copydoc ::vpx_codec_destroy_fn_t */ - vpx_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::vpx_codec_ctrl_fn_map_t */ - struct vpx_codec_dec_iface { - vpx_codec_peek_si_fn_t peek_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */ - vpx_codec_get_si_fn_t get_si; /**< \copydoc ::vpx_codec_get_si_fn_t */ - vpx_codec_decode_fn_t decode; /**< \copydoc ::vpx_codec_decode_fn_t */ - vpx_codec_get_frame_fn_t get_frame; /**< \copydoc ::vpx_codec_get_frame_fn_t */ - vpx_codec_set_fb_fn_t set_fb_fn; /**< \copydoc ::vpx_codec_set_fb_fn_t */ - } dec; - struct vpx_codec_enc_iface { - int cfg_map_count; - vpx_codec_enc_cfg_map_t *cfg_maps; /**< \copydoc ::vpx_codec_enc_cfg_map_t */ - vpx_codec_encode_fn_t encode; /**< \copydoc ::vpx_codec_encode_fn_t */ - vpx_codec_get_cx_data_fn_t get_cx_data; /**< \copydoc ::vpx_codec_get_cx_data_fn_t */ - vpx_codec_enc_config_set_fn_t cfg_set; /**< \copydoc ::vpx_codec_enc_config_set_fn_t */ - vpx_codec_get_global_headers_fn_t get_glob_hdrs; /**< \copydoc ::vpx_codec_get_global_headers_fn_t */ - vpx_codec_get_preview_frame_fn_t get_preview; /**< \copydoc ::vpx_codec_get_preview_frame_fn_t */ - vpx_codec_enc_mr_get_mem_loc_fn_t mr_get_mem_loc; /**< \copydoc ::vpx_codec_enc_mr_get_mem_loc_fn_t */ - } enc; -}; - -/*!\brief Callback function pointer / user data pair storage */ -typedef struct vpx_codec_priv_cb_pair { - union { - vpx_codec_put_frame_cb_fn_t put_frame; - vpx_codec_put_slice_cb_fn_t put_slice; - } u; - void *user_priv; -} vpx_codec_priv_cb_pair_t; - - -/*!\brief Instance private storage - * - * This structure is allocated by the algorithm's init function. It can be - * extended in one of two ways. First, a second, algorithm specific structure - * can be allocated and the priv member pointed to it. Alternatively, this - * structure can be made the first member of the algorithm specific structure, - * and the pointer cast to the proper type. - */ -struct vpx_codec_priv { - const char *err_detail; - vpx_codec_flags_t init_flags; - struct { - vpx_codec_priv_cb_pair_t put_frame_cb; - vpx_codec_priv_cb_pair_t put_slice_cb; - } dec; - struct { - vpx_fixed_buf_t cx_data_dst_buf; - unsigned int cx_data_pad_before; - unsigned int cx_data_pad_after; - vpx_codec_cx_pkt_t cx_data_pkt; - unsigned int total_encoders; - } enc; -}; - -/* - * Multi-resolution encoding internal configuration - */ -struct vpx_codec_priv_enc_mr_cfg -{ - unsigned int mr_total_resolutions; - unsigned int mr_encoder_id; - struct vpx_rational mr_down_sampling_factor; - void* mr_low_res_mode_info; -}; - -#undef VPX_CTRL_USE_TYPE -#define VPX_CTRL_USE_TYPE(id, typ) \ - static VPX_INLINE typ id##__value(va_list args) {return va_arg(args, typ);} - -#undef VPX_CTRL_USE_TYPE_DEPRECATED -#define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ - static VPX_INLINE typ id##__value(va_list args) {return va_arg(args, typ);} - -#define CAST(id, arg) id##__value(arg) - -/* CODEC_INTERFACE convenience macro - * - * By convention, each codec interface is a struct with extern linkage, where - * the symbol is suffixed with _algo. A getter function is also defined to - * return a pointer to the struct, since in some cases it's easier to work - * with text symbols than data symbols (see issue #169). This function has - * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE - * macro is provided to define this getter function automatically. - */ -#define CODEC_INTERFACE(id)\ - vpx_codec_iface_t* id(void) { return &id##_algo; }\ - vpx_codec_iface_t id##_algo - - -/* Internal Utility Functions - * - * The following functions are intended to be used inside algorithms as - * utilities for manipulating vpx_codec_* data structures. - */ -struct vpx_codec_pkt_list { - unsigned int cnt; - unsigned int max; - struct vpx_codec_cx_pkt pkts[1]; -}; - -#define vpx_codec_pkt_list_decl(n)\ - union {struct vpx_codec_pkt_list head;\ - struct {struct vpx_codec_pkt_list head;\ - struct vpx_codec_cx_pkt pkts[n];} alloc;} - -#define vpx_codec_pkt_list_init(m)\ - (m)->alloc.head.cnt = 0,\ - (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0]) - -int -vpx_codec_pkt_list_add(struct vpx_codec_pkt_list *, - const struct vpx_codec_cx_pkt *); - -const vpx_codec_cx_pkt_t * -vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list, - vpx_codec_iter_t *iter); - - -#include -#include - -struct vpx_internal_error_info { - vpx_codec_err_t error_code; - int has_detail; - char detail[80]; - int setjmp; - jmp_buf jmp; -}; - -#define CLANG_ANALYZER_NORETURN -#if defined(__has_feature) -#if __has_feature(attribute_analyzer_noreturn) -#undef CLANG_ANALYZER_NORETURN -#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn)) -#endif -#endif - -void vpx_internal_error(struct vpx_internal_error_info *info, - vpx_codec_err_t error, - const char *fmt, - ...) CLANG_ANALYZER_NORETURN; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ diff --git a/thirdparty/libvpx/vpx/internal/vpx_psnr.h b/thirdparty/libvpx/vpx/internal/vpx_psnr.h deleted file mode 100644 index 07d81bb8d9..0000000000 --- a/thirdparty/libvpx/vpx/internal/vpx_psnr.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_INTERNAL_VPX_PSNR_H_ -#define VPX_INTERNAL_VPX_PSNR_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t - -/*!\brief Converts SSE to PSNR - * - * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). - * - * \param[in] samples Number of samples - * \param[in] peak Max sample value - * \param[in] sse Sum of squared errors - */ -double vpx_sse_to_psnr(double samples, double peak, double sse); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_INTERNAL_VPX_PSNR_H_ diff --git a/thirdparty/libvpx/vpx/src/vpx_codec.c b/thirdparty/libvpx/vpx/src/vpx_codec.c deleted file mode 100644 index 5a495ce814..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_codec.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Provides the high level interface to wrap decoder algorithms. - * - */ -#include -#include -#include "vpx/vpx_integer.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx_version.h" - -#define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var) - -int vpx_codec_version(void) { - return VERSION_PACKED; -} - - -const char *vpx_codec_version_str(void) { - return VERSION_STRING_NOSP; -} - - -const char *vpx_codec_version_extra_str(void) { - return VERSION_EXTRA; -} - - -const char *vpx_codec_iface_name(vpx_codec_iface_t *iface) { - return iface ? iface->name : ""; -} - -const char *vpx_codec_err_to_string(vpx_codec_err_t err) { - switch (err) { - case VPX_CODEC_OK: - return "Success"; - case VPX_CODEC_ERROR: - return "Unspecified internal error"; - case VPX_CODEC_MEM_ERROR: - return "Memory allocation error"; - case VPX_CODEC_ABI_MISMATCH: - return "ABI version mismatch"; - case VPX_CODEC_INCAPABLE: - return "Codec does not implement requested capability"; - case VPX_CODEC_UNSUP_BITSTREAM: - return "Bitstream not supported by this decoder"; - case VPX_CODEC_UNSUP_FEATURE: - return "Bitstream required feature not supported by this decoder"; - case VPX_CODEC_CORRUPT_FRAME: - return "Corrupt frame detected"; - case VPX_CODEC_INVALID_PARAM: - return "Invalid parameter"; - case VPX_CODEC_LIST_END: - return "End of iterated list"; - } - - return "Unrecognized error code"; -} - -const char *vpx_codec_error(vpx_codec_ctx_t *ctx) { - return (ctx) ? vpx_codec_err_to_string(ctx->err) - : vpx_codec_err_to_string(VPX_CODEC_INVALID_PARAM); -} - -const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx) { - if (ctx && ctx->err) - return ctx->priv ? ctx->priv->err_detail : ctx->err_detail; - - return NULL; -} - - -vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx) { - vpx_codec_err_t res; - - if (!ctx) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv) - res = VPX_CODEC_ERROR; - else { - ctx->iface->destroy((vpx_codec_alg_priv_t *)ctx->priv); - - ctx->iface = NULL; - ctx->name = NULL; - ctx->priv = NULL; - res = VPX_CODEC_OK; - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface) { - return (iface) ? iface->caps : 0; -} - - -vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, - int ctrl_id, - ...) { - vpx_codec_err_t res; - - if (!ctx || !ctrl_id) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps) - res = VPX_CODEC_ERROR; - else { - vpx_codec_ctrl_fn_map_t *entry; - - res = VPX_CODEC_ERROR; - - for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) { - if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) { - va_list ap; - - va_start(ap, ctrl_id); - res = entry->fn((vpx_codec_alg_priv_t *)ctx->priv, ap); - va_end(ap); - break; - } - } - } - - return SAVE_STATUS(ctx, res); -} - -void vpx_internal_error(struct vpx_internal_error_info *info, - vpx_codec_err_t error, - const char *fmt, - ...) { - va_list ap; - - info->error_code = error; - info->has_detail = 0; - - if (fmt) { - size_t sz = sizeof(info->detail); - - info->has_detail = 1; - va_start(ap, fmt); - vsnprintf(info->detail, sz - 1, fmt, ap); - va_end(ap); - info->detail[sz - 1] = '\0'; - } - - if (info->setjmp) - longjmp(info->jmp, info->error_code); -} diff --git a/thirdparty/libvpx/vpx/src/vpx_decoder.c b/thirdparty/libvpx/vpx/src/vpx_decoder.c deleted file mode 100644 index 802d8edd8a..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_decoder.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Provides the high level interface to wrap decoder algorithms. - * - */ -#include -#include "vpx/internal/vpx_codec_internal.h" - -#define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var) - -static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) { - return (vpx_codec_alg_priv_t *)ctx->priv; -} - -vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - const vpx_codec_dec_cfg_t *cfg, - vpx_codec_flags_t flags, - int ver) { - vpx_codec_err_t res; - - if (ver != VPX_DECODER_ABI_VERSION) - res = VPX_CODEC_ABI_MISMATCH; - else if (!ctx || !iface) - res = VPX_CODEC_INVALID_PARAM; - else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION) - res = VPX_CODEC_ABI_MISMATCH; - else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC)) - res = VPX_CODEC_INCAPABLE; - else if ((flags & VPX_CODEC_USE_ERROR_CONCEALMENT) && - !(iface->caps & VPX_CODEC_CAP_ERROR_CONCEALMENT)) - res = VPX_CODEC_INCAPABLE; - else if ((flags & VPX_CODEC_USE_INPUT_FRAGMENTS) && - !(iface->caps & VPX_CODEC_CAP_INPUT_FRAGMENTS)) - res = VPX_CODEC_INCAPABLE; - else if (!(iface->caps & VPX_CODEC_CAP_DECODER)) - res = VPX_CODEC_INCAPABLE; - else { - memset(ctx, 0, sizeof(*ctx)); - ctx->iface = iface; - ctx->name = iface->name; - ctx->priv = NULL; - ctx->init_flags = flags; - ctx->config.dec = cfg; - - res = ctx->iface->init(ctx, NULL); - if (res) { - ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; - vpx_codec_destroy(ctx); - } - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, - const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si) { - vpx_codec_err_t res; - - if (!iface || !data || !data_sz || !si - || si->sz < sizeof(vpx_codec_stream_info_t)) - res = VPX_CODEC_INVALID_PARAM; - else { - /* Set default/unknown values */ - si->w = 0; - si->h = 0; - - res = iface->dec.peek_si(data, data_sz, si); - } - - return res; -} - - -vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, - vpx_codec_stream_info_t *si) { - vpx_codec_err_t res; - - if (!ctx || !si || si->sz < sizeof(vpx_codec_stream_info_t)) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv) - res = VPX_CODEC_ERROR; - else { - /* Set default/unknown values */ - si->w = 0; - si->h = 0; - - res = ctx->iface->dec.get_si(get_alg_priv(ctx), si); - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline) { - vpx_codec_err_t res; - - /* Sanity checks */ - /* NULL data ptr allowed if data_sz is 0 too */ - if (!ctx || (!data && data_sz) || (data && !data_sz)) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv) - res = VPX_CODEC_ERROR; - else { - res = ctx->iface->dec.decode(get_alg_priv(ctx), data, data_sz, user_priv, - deadline); - } - - return SAVE_STATUS(ctx, res); -} - -vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, - vpx_codec_iter_t *iter) { - vpx_image_t *img; - - if (!ctx || !iter || !ctx->iface || !ctx->priv) - img = NULL; - else - img = ctx->iface->dec.get_frame(get_alg_priv(ctx), iter); - - return img; -} - - -vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_frame_cb_fn_t cb, - void *user_priv) { - vpx_codec_err_t res; - - if (!ctx || !cb) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv - || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME)) - res = VPX_CODEC_ERROR; - else { - ctx->priv->dec.put_frame_cb.u.put_frame = cb; - ctx->priv->dec.put_frame_cb.user_priv = user_priv; - res = VPX_CODEC_OK; - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_slice_cb_fn_t cb, - void *user_priv) { - vpx_codec_err_t res; - - if (!ctx || !cb) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv - || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_SLICE)) - res = VPX_CODEC_ERROR; - else { - ctx->priv->dec.put_slice_cb.u.put_slice = cb; - ctx->priv->dec.put_slice_cb.user_priv = user_priv; - res = VPX_CODEC_OK; - } - - return SAVE_STATUS(ctx, res); -} - -vpx_codec_err_t vpx_codec_set_frame_buffer_functions( - vpx_codec_ctx_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { - vpx_codec_err_t res; - - if (!ctx || !cb_get || !cb_release) { - res = VPX_CODEC_INVALID_PARAM; - } else if (!ctx->iface || !ctx->priv || - !(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) { - res = VPX_CODEC_ERROR; - } else { - res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release, - cb_priv); - } - - return SAVE_STATUS(ctx, res); -} diff --git a/thirdparty/libvpx/vpx/src/vpx_image.c b/thirdparty/libvpx/vpx/src/vpx_image.c deleted file mode 100644 index 9aae12c794..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_image.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "vpx/vpx_image.h" -#include "vpx/vpx_integer.h" -#include "vpx_mem/vpx_mem.h" - -static vpx_image_t *img_alloc_helper(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int buf_align, - unsigned int stride_align, - unsigned char *img_data) { - unsigned int h, w, s, xcs, ycs, bps; - unsigned int stride_in_bytes; - int align; - - /* Treat align==0 like align==1 */ - if (!buf_align) - buf_align = 1; - - /* Validate alignment (must be power of 2) */ - if (buf_align & (buf_align - 1)) - goto fail; - - /* Treat align==0 like align==1 */ - if (!stride_align) - stride_align = 1; - - /* Validate alignment (must be power of 2) */ - if (stride_align & (stride_align - 1)) - goto fail; - - /* Get sample size for this format */ - switch (fmt) { - case VPX_IMG_FMT_RGB32: - case VPX_IMG_FMT_RGB32_LE: - case VPX_IMG_FMT_ARGB: - case VPX_IMG_FMT_ARGB_LE: - bps = 32; - break; - case VPX_IMG_FMT_RGB24: - case VPX_IMG_FMT_BGR24: - bps = 24; - break; - case VPX_IMG_FMT_RGB565: - case VPX_IMG_FMT_RGB565_LE: - case VPX_IMG_FMT_RGB555: - case VPX_IMG_FMT_RGB555_LE: - case VPX_IMG_FMT_UYVY: - case VPX_IMG_FMT_YUY2: - case VPX_IMG_FMT_YVYU: - bps = 16; - break; - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: - bps = 12; - break; - case VPX_IMG_FMT_I422: - case VPX_IMG_FMT_I440: - bps = 16; - break; - case VPX_IMG_FMT_I444: - bps = 24; - break; - case VPX_IMG_FMT_I42016: - bps = 24; - break; - case VPX_IMG_FMT_I42216: - case VPX_IMG_FMT_I44016: - bps = 32; - break; - case VPX_IMG_FMT_I44416: - bps = 48; - break; - default: - bps = 16; - break; - } - - /* Get chroma shift values for this format */ - switch (fmt) { - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: - case VPX_IMG_FMT_I422: - case VPX_IMG_FMT_I42016: - case VPX_IMG_FMT_I42216: - xcs = 1; - break; - default: - xcs = 0; - break; - } - - switch (fmt) { - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_I440: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: - case VPX_IMG_FMT_I42016: - case VPX_IMG_FMT_I44016: - ycs = 1; - break; - default: - ycs = 0; - break; - } - - /* Calculate storage sizes given the chroma subsampling */ - align = (1 << xcs) - 1; - w = (d_w + align) & ~align; - align = (1 << ycs) - 1; - h = (d_h + align) & ~align; - s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8; - s = (s + stride_align - 1) & ~(stride_align - 1); - stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s; - - /* Allocate the new image */ - if (!img) { - img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t)); - - if (!img) - goto fail; - - img->self_allocd = 1; - } else { - memset(img, 0, sizeof(vpx_image_t)); - } - - img->img_data = img_data; - - if (!img_data) { - const uint64_t alloc_size = (fmt & VPX_IMG_FMT_PLANAR) ? - (uint64_t)h * s * bps / 8 : (uint64_t)h * s; - - if (alloc_size != (size_t)alloc_size) - goto fail; - - img->img_data = (uint8_t *)vpx_memalign(buf_align, (size_t)alloc_size); - img->img_data_owner = 1; - } - - if (!img->img_data) - goto fail; - - img->fmt = fmt; - img->bit_depth = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 16 : 8; - img->w = w; - img->h = h; - img->x_chroma_shift = xcs; - img->y_chroma_shift = ycs; - img->bps = bps; - - /* Calculate strides */ - img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = stride_in_bytes; - img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = stride_in_bytes >> xcs; - - /* Default viewport to entire image */ - if (!vpx_img_set_rect(img, 0, 0, d_w, d_h)) - return img; - -fail: - vpx_img_free(img); - return NULL; -} - -vpx_image_t *vpx_img_alloc(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int align) { - return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL); -} - -vpx_image_t *vpx_img_wrap(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int stride_align, - unsigned char *img_data) { - /* By setting buf_align = 1, we don't change buffer alignment in this - * function. */ - return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data); -} - -int vpx_img_set_rect(vpx_image_t *img, - unsigned int x, - unsigned int y, - unsigned int w, - unsigned int h) { - unsigned char *data; - - if (x + w <= img->w && y + h <= img->h) { - img->d_w = w; - img->d_h = h; - - /* Calculate plane pointers */ - if (!(img->fmt & VPX_IMG_FMT_PLANAR)) { - img->planes[VPX_PLANE_PACKED] = - img->img_data + x * img->bps / 8 + y * img->stride[VPX_PLANE_PACKED]; - } else { - const int bytes_per_sample = - (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; - data = img->img_data; - - if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) { - img->planes[VPX_PLANE_ALPHA] = - data + x * bytes_per_sample + y * img->stride[VPX_PLANE_ALPHA]; - data += img->h * img->stride[VPX_PLANE_ALPHA]; - } - - img->planes[VPX_PLANE_Y] = data + x * bytes_per_sample + - y * img->stride[VPX_PLANE_Y]; - data += img->h * img->stride[VPX_PLANE_Y]; - - if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) { - img->planes[VPX_PLANE_U] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; - data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; - img->planes[VPX_PLANE_V] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; - } else { - img->planes[VPX_PLANE_V] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; - data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; - img->planes[VPX_PLANE_U] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; - } - } - return 0; - } - return -1; -} - -void vpx_img_flip(vpx_image_t *img) { - /* Note: In the calculation pointer adjustment calculation, we want the - * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99 - * standard indicates that if the adjustment parameter is unsigned, the - * stride parameter will be promoted to unsigned, causing errors when - * the lhs is a larger type than the rhs. - */ - img->planes[VPX_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_Y]; - img->stride[VPX_PLANE_Y] = -img->stride[VPX_PLANE_Y]; - - img->planes[VPX_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1) - * img->stride[VPX_PLANE_U]; - img->stride[VPX_PLANE_U] = -img->stride[VPX_PLANE_U]; - - img->planes[VPX_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1) - * img->stride[VPX_PLANE_V]; - img->stride[VPX_PLANE_V] = -img->stride[VPX_PLANE_V]; - - img->planes[VPX_PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_ALPHA]; - img->stride[VPX_PLANE_ALPHA] = -img->stride[VPX_PLANE_ALPHA]; -} - -void vpx_img_free(vpx_image_t *img) { - if (img) { - if (img->img_data && img->img_data_owner) - vpx_free(img->img_data); - - if (img->self_allocd) - free(img); - } -} diff --git a/thirdparty/libvpx/vpx/src/vpx_psnr.c b/thirdparty/libvpx/vpx/src/vpx_psnr.c deleted file mode 100644 index 05843acb61..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_psnr.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vpx/internal/vpx_psnr.h" - -#define MAX_PSNR 100.0 - -double vpx_sse_to_psnr(double samples, double peak, double sse) { - if (sse > 0.0) { - const double psnr = 10.0 * log10(samples * peak * peak / sse); - return psnr > MAX_PSNR ? MAX_PSNR : psnr; - } else { - return MAX_PSNR; - } -} diff --git a/thirdparty/libvpx/vpx/vp8.h b/thirdparty/libvpx/vpx/vp8.h deleted file mode 100644 index 8a035f9770..0000000000 --- a/thirdparty/libvpx/vpx/vp8.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/*!\defgroup vp8 VP8 - * \ingroup codecs - * VP8 is vpx's newest video compression algorithm that uses motion - * compensated prediction, Discrete Cosine Transform (DCT) coding of the - * prediction error signal and context dependent entropy coding techniques - * based on arithmetic principles. It features: - * - YUV 4:2:0 image format - * - Macro-block based coding (16x16 luma plus two 8x8 chroma) - * - 1/4 (1/8) pixel accuracy motion compensated prediction - * - 4x4 DCT transform - * - 128 level linear quantizer - * - In loop deblocking filter - * - Context-based entropy coding - * - * @{ - */ -/*!\file - * \brief Provides controls common to both the VP8 encoder and decoder. - */ -#ifndef VPX_VP8_H_ -#define VPX_VP8_H_ - -#include "./vpx_codec.h" -#include "./vpx_image.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*!\brief Control functions - * - * The set of macros define the control functions of VP8 interface - */ -enum vp8_com_control_id { - VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */ - VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ - VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */ - VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */ - VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */ - VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */ - VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */ - - /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+) - * for its control ids. These should be migrated to something like the - * VP8_DECODER_CTRL_ID_START range next time we're ready to break the ABI. - */ - VP9_GET_REFERENCE = 128, /**< get a pointer to a reference frame */ - VP8_COMMON_CTRL_ID_MAX, - VP8_DECODER_CTRL_ID_START = 256 -}; - -/*!\brief post process flags - * - * The set of macros define VP8 decoder post processing flags - */ -enum vp8_postproc_level { - VP8_NOFILTERING = 0, - VP8_DEBLOCK = 1 << 0, - VP8_DEMACROBLOCK = 1 << 1, - VP8_ADDNOISE = 1 << 2, - VP8_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */ - VP8_DEBUG_TXT_MBLK_MODES = 1 << 4, /**< print macro block modes over each macro block */ - VP8_DEBUG_TXT_DC_DIFF = 1 << 5, /**< print dc diff for each macro block */ - VP8_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */ - VP8_MFQE = 1 << 10 -}; - -/*!\brief post process flags - * - * This define a structure that describe the post processing settings. For - * the best objective measure (using the PSNR metric) set post_proc_flag - * to VP8_DEBLOCK and deblocking_level to 1. - */ - -typedef struct vp8_postproc_cfg { - int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */ - int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ - int noise_level; /**< the strength of additive noise, valid range [0, 16] */ -} vp8_postproc_cfg_t; - -/*!\brief reference frame type - * - * The set of macros define the type of VP8 reference frames - */ -typedef enum vpx_ref_frame_type { - VP8_LAST_FRAME = 1, - VP8_GOLD_FRAME = 2, - VP8_ALTR_FRAME = 4 -} vpx_ref_frame_type_t; - -/*!\brief reference frame data struct - * - * Define the data struct to access vp8 reference frames. - */ -typedef struct vpx_ref_frame { - vpx_ref_frame_type_t frame_type; /**< which reference frame */ - vpx_image_t img; /**< reference frame data in image format */ -} vpx_ref_frame_t; - -/*!\brief VP9 specific reference frame data struct - * - * Define the data struct to access vp9 reference frames. - */ -typedef struct vp9_ref_frame { - int idx; /**< frame index to get (input) */ - vpx_image_t img; /**< img structure to populate (output) */ -} vp9_ref_frame_t; - -/*!\cond */ -/*!\brief vp8 decoder control function parameter type - * - * defines the data type for each of VP8 decoder control function requires - */ -VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *) -#define VPX_CTRL_VP8_SET_REFERENCE -VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) -#define VPX_CTRL_VP8_COPY_REFERENCE -VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) -#define VPX_CTRL_VP8_SET_POSTPROC -VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_REF_FRAME -VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_MB_MODES -VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_B_MODES -VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) -#define VPX_CTRL_VP8_SET_DBG_DISPLAY_MV -VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) -#define VPX_CTRL_VP9_GET_REFERENCE - -/*!\endcond */ -/*! @} - end defgroup vp8 */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VP8_H_ diff --git a/thirdparty/libvpx/vpx/vp8dx.h b/thirdparty/libvpx/vpx/vp8dx.h deleted file mode 100644 index 67c97bb6c9..0000000000 --- a/thirdparty/libvpx/vpx/vp8dx.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\defgroup vp8_decoder WebM VP8/VP9 Decoder - * \ingroup vp8 - * - * @{ - */ -/*!\file - * \brief Provides definitions for using VP8 or VP9 within the vpx Decoder - * interface. - */ -#ifndef VPX_VP8DX_H_ -#define VPX_VP8DX_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Include controls common to both the encoder and decoder */ -#include "./vp8.h" - -/*!\name Algorithm interface for VP8 - * - * This interface provides the capability to decode VP8 streams. - * @{ - */ -extern vpx_codec_iface_t vpx_codec_vp8_dx_algo; -extern vpx_codec_iface_t *vpx_codec_vp8_dx(void); -/*!@} - end algorithm interface member group*/ - -/*!\name Algorithm interface for VP9 - * - * This interface provides the capability to decode VP9 streams. - * @{ - */ -extern vpx_codec_iface_t vpx_codec_vp9_dx_algo; -extern vpx_codec_iface_t *vpx_codec_vp9_dx(void); -/*!@} - end algorithm interface member group*/ - -/*!\enum vp8_dec_control_id - * \brief VP8 decoder control functions - * - * This set of macros define the control functions available for the VP8 - * decoder interface. - * - * \sa #vpx_codec_control - */ -enum vp8_dec_control_id { - /** control function to get info on which reference frames were updated - * by the last decode - */ - VP8D_GET_LAST_REF_UPDATES = VP8_DECODER_CTRL_ID_START, - - /** check if the indicated frame is corrupted */ - VP8D_GET_FRAME_CORRUPTED, - - /** control function to get info on which reference frames were used - * by the last decode - */ - VP8D_GET_LAST_REF_USED, - - /** decryption function to decrypt encoded buffer data immediately - * before decoding. Takes a vpx_decrypt_init, which contains - * a callback function and opaque context pointer. - */ - VPXD_SET_DECRYPTOR, - VP8D_SET_DECRYPTOR = VPXD_SET_DECRYPTOR, - - /** control function to get the dimensions that the current frame is decoded - * at. This may be different to the intended display size for the frame as - * specified in the wrapper or frame header (see VP9D_GET_DISPLAY_SIZE). */ - VP9D_GET_FRAME_SIZE, - - /** control function to get the current frame's intended display dimensions - * (as specified in the wrapper or frame header). This may be different to - * the decoded dimensions of this frame (see VP9D_GET_FRAME_SIZE). */ - VP9D_GET_DISPLAY_SIZE, - - /** control function to get the bit depth of the stream. */ - VP9D_GET_BIT_DEPTH, - - /** control function to set the byte alignment of the planes in the reference - * buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets - * legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly - * follows Y plane, and V plane directly follows U plane. Default value is 0. - */ - VP9_SET_BYTE_ALIGNMENT, - - /** control function to invert the decoding order to from right to left. The - * function is used in a test to confirm the decoding independence of tile - * columns. The function may be used in application where this order - * of decoding is desired. - * - * TODO(yaowu): Rework the unit test that uses this control, and in a future - * release, this test-only control shall be removed. - */ - VP9_INVERT_TILE_DECODE_ORDER, - - /** control function to set the skip loop filter flag. Valid values are - * integers. The decoder will skip the loop filter when its value is set to - * nonzero. If the loop filter is skipped the decoder may accumulate decode - * artifacts. The default value is 0. - */ - VP9_SET_SKIP_LOOP_FILTER, - - VP8_DECODER_CTRL_ID_MAX -}; - -/** Decrypt n bytes of data from input -> output, using the decrypt_state - * passed in VPXD_SET_DECRYPTOR. - */ -typedef void (*vpx_decrypt_cb)(void *decrypt_state, const unsigned char *input, - unsigned char *output, int count); - -/*!\brief Structure to hold decryption state - * - * Defines a structure to hold the decryption state and access function. - */ -typedef struct vpx_decrypt_init { - /*! Decrypt callback. */ - vpx_decrypt_cb decrypt_cb; - - /*! Decryption state. */ - void *decrypt_state; -} vpx_decrypt_init; - -/*!\brief A deprecated alias for vpx_decrypt_init. - */ -typedef vpx_decrypt_init vp8_decrypt_init; - - -/*!\cond */ -/*!\brief VP8 decoder control function parameter type - * - * Defines the data types that VP8D control functions take. Note that - * additional common controls are defined in vp8.h - * - */ - - -VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *) -#define VPX_CTRL_VP8D_GET_LAST_REF_UPDATES -VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) -#define VPX_CTRL_VP8D_GET_FRAME_CORRUPTED -VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *) -#define VPX_CTRL_VP8D_GET_LAST_REF_USED -VPX_CTRL_USE_TYPE(VPXD_SET_DECRYPTOR, vpx_decrypt_init *) -#define VPX_CTRL_VPXD_SET_DECRYPTOR -VPX_CTRL_USE_TYPE(VP8D_SET_DECRYPTOR, vpx_decrypt_init *) -#define VPX_CTRL_VP8D_SET_DECRYPTOR -VPX_CTRL_USE_TYPE(VP9D_GET_DISPLAY_SIZE, int *) -#define VPX_CTRL_VP9D_GET_DISPLAY_SIZE -VPX_CTRL_USE_TYPE(VP9D_GET_BIT_DEPTH, unsigned int *) -#define VPX_CTRL_VP9D_GET_BIT_DEPTH -VPX_CTRL_USE_TYPE(VP9D_GET_FRAME_SIZE, int *) -#define VPX_CTRL_VP9D_GET_FRAME_SIZE -VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int) -#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER - -/*!\endcond */ -/*! @} - end defgroup vp8_decoder */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VP8DX_H_ diff --git a/thirdparty/libvpx/vpx/vpx_codec.h b/thirdparty/libvpx/vpx/vpx_codec.h deleted file mode 100644 index b6037bb4d7..0000000000 --- a/thirdparty/libvpx/vpx/vpx_codec.h +++ /dev/null @@ -1,479 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\defgroup codec Common Algorithm Interface - * This abstraction allows applications to easily support multiple video - * formats with minimal code duplication. This section describes the interface - * common to all codecs (both encoders and decoders). - * @{ - */ - -/*!\file - * \brief Describes the codec algorithm interface to applications. - * - * This file describes the interface between an application and a - * video codec algorithm. - * - * An application instantiates a specific codec instance by using - * vpx_codec_init() and a pointer to the algorithm's interface structure: - *
- *     my_app.c:
- *       extern vpx_codec_iface_t my_codec;
- *       {
- *           vpx_codec_ctx_t algo;
- *           res = vpx_codec_init(&algo, &my_codec);
- *       }
- *     
- * - * Once initialized, the instance is manged using other functions from - * the vpx_codec_* family. - */ -#ifndef VPX_VPX_CODEC_H_ -#define VPX_VPX_CODEC_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_integer.h" -#include "./vpx_image.h" - - /*!\brief Decorator indicating a function is deprecated */ -#ifndef DEPRECATED -#if defined(__GNUC__) && __GNUC__ -#define DEPRECATED __attribute__ ((deprecated)) -#elif defined(_MSC_VER) -#define DEPRECATED -#else -#define DEPRECATED -#endif -#endif /* DEPRECATED */ - -#ifndef DECLSPEC_DEPRECATED -#if defined(__GNUC__) && __GNUC__ -#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ -#elif defined(_MSC_VER) -#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ -#else -#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ -#endif -#endif /* DECLSPEC_DEPRECATED */ - - /*!\brief Decorator indicating a function is potentially unused */ -#ifdef UNUSED -#elif defined(__GNUC__) || defined(__clang__) -#define UNUSED __attribute__ ((unused)) -#else -#define UNUSED -#endif - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_CODEC_ABI_VERSION (3 + VPX_IMAGE_ABI_VERSION) /**<\hideinitializer*/ - - /*!\brief Algorithm return codes */ - typedef enum { - /*!\brief Operation completed without error */ - VPX_CODEC_OK, - - /*!\brief Unspecified error */ - VPX_CODEC_ERROR, - - /*!\brief Memory operation failed */ - VPX_CODEC_MEM_ERROR, - - /*!\brief ABI version mismatch */ - VPX_CODEC_ABI_MISMATCH, - - /*!\brief Algorithm does not have required capability */ - VPX_CODEC_INCAPABLE, - - /*!\brief The given bitstream is not supported. - * - * The bitstream was unable to be parsed at the highest level. The decoder - * is unable to proceed. This error \ref SHOULD be treated as fatal to the - * stream. */ - VPX_CODEC_UNSUP_BITSTREAM, - - /*!\brief Encoded bitstream uses an unsupported feature - * - * The decoder does not implement a feature required by the encoder. This - * return code should only be used for features that prevent future - * pictures from being properly decoded. This error \ref MAY be treated as - * fatal to the stream or \ref MAY be treated as fatal to the current GOP. - */ - VPX_CODEC_UNSUP_FEATURE, - - /*!\brief The coded data for this stream is corrupt or incomplete - * - * There was a problem decoding the current frame. This return code - * should only be used for failures that prevent future pictures from - * being properly decoded. This error \ref MAY be treated as fatal to the - * stream or \ref MAY be treated as fatal to the current GOP. If decoding - * is continued for the current GOP, artifacts may be present. - */ - VPX_CODEC_CORRUPT_FRAME, - - /*!\brief An application-supplied parameter is not valid. - * - */ - VPX_CODEC_INVALID_PARAM, - - /*!\brief An iterator reached the end of list. - * - */ - VPX_CODEC_LIST_END - - } - vpx_codec_err_t; - - - /*! \brief Codec capabilities bitfield - * - * Each codec advertises the capabilities it supports as part of its - * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces - * or functionality, and are not required to be supported. - * - * The available flags are specified by VPX_CODEC_CAP_* defines. - */ - typedef long vpx_codec_caps_t; -#define VPX_CODEC_CAP_DECODER 0x1 /**< Is a decoder */ -#define VPX_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */ - - - /*! \brief Initialization-time Feature Enabling - * - * Certain codec features must be known at initialization time, to allow for - * proper memory allocation. - * - * The available flags are specified by VPX_CODEC_USE_* defines. - */ - typedef long vpx_codec_flags_t; - - - /*!\brief Codec interface structure. - * - * Contains function pointers and other data private to the codec - * implementation. This structure is opaque to the application. - */ - typedef const struct vpx_codec_iface vpx_codec_iface_t; - - - /*!\brief Codec private data structure. - * - * Contains data private to the codec implementation. This structure is opaque - * to the application. - */ - typedef struct vpx_codec_priv vpx_codec_priv_t; - - - /*!\brief Iterator - * - * Opaque storage used for iterating over lists. - */ - typedef const void *vpx_codec_iter_t; - - - /*!\brief Codec context structure - * - * All codecs \ref MUST support this context structure fully. In general, - * this data should be considered private to the codec algorithm, and - * not be manipulated or examined by the calling application. Applications - * may reference the 'name' member to get a printable description of the - * algorithm. - */ - typedef struct vpx_codec_ctx { - const char *name; /**< Printable interface name */ - vpx_codec_iface_t *iface; /**< Interface pointers */ - vpx_codec_err_t err; /**< Last returned error */ - const char *err_detail; /**< Detailed info, if available */ - vpx_codec_flags_t init_flags; /**< Flags passed at init time */ - union { - /**< Decoder Configuration Pointer */ - const struct vpx_codec_dec_cfg *dec; - /**< Encoder Configuration Pointer */ - const struct vpx_codec_enc_cfg *enc; - const void *raw; - } config; /**< Configuration pointer aliasing union */ - vpx_codec_priv_t *priv; /**< Algorithm private storage */ - } vpx_codec_ctx_t; - - /*!\brief Bit depth for codec - * * - * This enumeration determines the bit depth of the codec. - */ - typedef enum vpx_bit_depth { - VPX_BITS_8 = 8, /**< 8 bits */ - VPX_BITS_10 = 10, /**< 10 bits */ - VPX_BITS_12 = 12, /**< 12 bits */ - } vpx_bit_depth_t; - - /* - * Library Version Number Interface - * - * For example, see the following sample return values: - * vpx_codec_version() (1<<16 | 2<<8 | 3) - * vpx_codec_version_str() "v1.2.3-rc1-16-gec6a1ba" - * vpx_codec_version_extra_str() "rc1-16-gec6a1ba" - */ - - /*!\brief Return the version information (as an integer) - * - * Returns a packed encoding of the library version number. This will only include - * the major.minor.patch component of the version number. Note that this encoded - * value should be accessed through the macros provided, as the encoding may change - * in the future. - * - */ - int vpx_codec_version(void); -#define VPX_VERSION_MAJOR(v) ((v>>16)&0xff) /**< extract major from packed version */ -#define VPX_VERSION_MINOR(v) ((v>>8)&0xff) /**< extract minor from packed version */ -#define VPX_VERSION_PATCH(v) ((v>>0)&0xff) /**< extract patch from packed version */ - - /*!\brief Return the version major number */ -#define vpx_codec_version_major() ((vpx_codec_version()>>16)&0xff) - - /*!\brief Return the version minor number */ -#define vpx_codec_version_minor() ((vpx_codec_version()>>8)&0xff) - - /*!\brief Return the version patch number */ -#define vpx_codec_version_patch() ((vpx_codec_version()>>0)&0xff) - - - /*!\brief Return the version information (as a string) - * - * Returns a printable string containing the full library version number. This may - * contain additional text following the three digit version number, as to indicate - * release candidates, prerelease versions, etc. - * - */ - const char *vpx_codec_version_str(void); - - - /*!\brief Return the version information (as a string) - * - * Returns a printable "extra string". This is the component of the string returned - * by vpx_codec_version_str() following the three digit version number. - * - */ - const char *vpx_codec_version_extra_str(void); - - - /*!\brief Return the build configuration - * - * Returns a printable string containing an encoded version of the build - * configuration. This may be useful to vpx support. - * - */ - const char *vpx_codec_build_config(void); - - - /*!\brief Return the name for a given interface - * - * Returns a human readable string for name of the given codec interface. - * - * \param[in] iface Interface pointer - * - */ - const char *vpx_codec_iface_name(vpx_codec_iface_t *iface); - - - /*!\brief Convert error number to printable string - * - * Returns a human readable string for the last error returned by the - * algorithm. The returned error will be one line and will not contain - * any newline characters. - * - * - * \param[in] err Error number. - * - */ - const char *vpx_codec_err_to_string(vpx_codec_err_t err); - - - /*!\brief Retrieve error synopsis for codec context - * - * Returns a human readable string for the last error returned by the - * algorithm. The returned error will be one line and will not contain - * any newline characters. - * - * - * \param[in] ctx Pointer to this instance's context. - * - */ - const char *vpx_codec_error(vpx_codec_ctx_t *ctx); - - - /*!\brief Retrieve detailed error information for codec context - * - * Returns a human readable string providing detailed information about - * the last error. - * - * \param[in] ctx Pointer to this instance's context. - * - * \retval NULL - * No detailed information is available. - */ - const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx); - - - /* REQUIRED FUNCTIONS - * - * The following functions are required to be implemented for all codecs. - * They represent the base case functionality expected of all codecs. - */ - - /*!\brief Destroy a codec instance - * - * Destroys a codec context, freeing any associated memory buffers. - * - * \param[in] ctx Pointer to this instance's context - * - * \retval #VPX_CODEC_OK - * The codec algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx); - - - /*!\brief Get the capabilities of an algorithm. - * - * Retrieves the capabilities bitfield from the algorithm's interface. - * - * \param[in] iface Pointer to the algorithm interface - * - */ - vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface); - - - /*!\brief Control algorithm - * - * This function is used to exchange algorithm specific data with the codec - * instance. This can be used to implement features specific to a particular - * algorithm. - * - * This wrapper function dispatches the request to the helper function - * associated with the given ctrl_id. It tries to call this function - * transparently, but will return #VPX_CODEC_ERROR if the request could not - * be dispatched. - * - * Note that this function should not be used directly. Call the - * #vpx_codec_control wrapper macro instead. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] ctrl_id Algorithm specific control identifier - * - * \retval #VPX_CODEC_OK - * The control request was processed. - * \retval #VPX_CODEC_ERROR - * The control request was not processed. - * \retval #VPX_CODEC_INVALID_PARAM - * The data was not valid. - */ - vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, - int ctrl_id, - ...); -#if defined(VPX_DISABLE_CTRL_TYPECHECKS) && VPX_DISABLE_CTRL_TYPECHECKS -# define vpx_codec_control(ctx,id,data) vpx_codec_control_(ctx,id,data) -# define VPX_CTRL_USE_TYPE(id, typ) -# define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) -# define VPX_CTRL_VOID(id, typ) - -#else - /*!\brief vpx_codec_control wrapper macro - * - * This macro allows for type safe conversions across the variadic parameter - * to vpx_codec_control_(). - * - * \internal - * It works by dispatching the call to the control function through a wrapper - * function named with the id parameter. - */ -# define vpx_codec_control(ctx,id,data) vpx_codec_control_##id(ctx,id,data)\ - /**<\hideinitializer*/ - - - /*!\brief vpx_codec_control type definition macro - * - * This macro allows for type safe conversions across the variadic parameter - * to vpx_codec_control_(). It defines the type of the argument for a given - * control identifier. - * - * \internal - * It defines a static function with - * the correctly typed arguments as a wrapper to the type-unsafe internal - * function. - */ -# define VPX_CTRL_USE_TYPE(id, typ) \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) UNUSED;\ - \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\ - return vpx_codec_control_(ctx, ctrl_id, data);\ - } /**<\hideinitializer*/ - - - /*!\brief vpx_codec_control deprecated type definition macro - * - * Like #VPX_CTRL_USE_TYPE, but indicates that the specified control is - * deprecated and should not be used. Consult the documentation for your - * codec for more information. - * - * \internal - * It defines a static function with the correctly typed arguments as a - * wrapper to the type-unsafe internal function. - */ -# define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ - DECLSPEC_DEPRECATED static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) DEPRECATED UNUSED;\ - \ - DECLSPEC_DEPRECATED static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\ - return vpx_codec_control_(ctx, ctrl_id, data);\ - } /**<\hideinitializer*/ - - - /*!\brief vpx_codec_control void type definition macro - * - * This macro allows for type safe conversions across the variadic parameter - * to vpx_codec_control_(). It indicates that a given control identifier takes - * no argument. - * - * \internal - * It defines a static function without a data argument as a wrapper to the - * type-unsafe internal function. - */ -# define VPX_CTRL_VOID(id) \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t*, int) UNUSED;\ - \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id) {\ - return vpx_codec_control_(ctx, ctrl_id);\ - } /**<\hideinitializer*/ - - -#endif - - /*!@} - end defgroup codec*/ -#ifdef __cplusplus -} -#endif -#endif // VPX_VPX_CODEC_H_ - diff --git a/thirdparty/libvpx/vpx/vpx_decoder.h b/thirdparty/libvpx/vpx/vpx_decoder.h deleted file mode 100644 index 62fd919756..0000000000 --- a/thirdparty/libvpx/vpx/vpx_decoder.h +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_VPX_DECODER_H_ -#define VPX_VPX_DECODER_H_ - -/*!\defgroup decoder Decoder Algorithm Interface - * \ingroup codec - * This abstraction allows applications using this decoder to easily support - * multiple video formats with minimal code duplication. This section describes - * the interface common to all decoders. - * @{ - */ - -/*!\file - * \brief Describes the decoder algorithm interface to applications. - * - * This file describes the interface between an application and a - * video decoder algorithm. - * - */ -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_codec.h" -#include "./vpx_frame_buffer.h" - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_DECODER_ABI_VERSION (3 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ - - /*! \brief Decoder capabilities bitfield - * - * Each decoder advertises the capabilities it supports as part of its - * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces - * or functionality, and are not required to be supported by a decoder. - * - * The available flags are specified by VPX_CODEC_CAP_* defines. - */ -#define VPX_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */ -#define VPX_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */ -#define VPX_CODEC_CAP_POSTPROC 0x40000 /**< Can postprocess decoded frame */ -#define VPX_CODEC_CAP_ERROR_CONCEALMENT 0x80000 /**< Can conceal errors due to - packet loss */ -#define VPX_CODEC_CAP_INPUT_FRAGMENTS 0x100000 /**< Can receive encoded frames - one fragment at a time */ - - /*! \brief Initialization-time Feature Enabling - * - * Certain codec features must be known at initialization time, to allow for - * proper memory allocation. - * - * The available flags are specified by VPX_CODEC_USE_* defines. - */ -#define VPX_CODEC_CAP_FRAME_THREADING 0x200000 /**< Can support frame-based - multi-threading */ -#define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000 /**< Can support external - frame buffers */ - -#define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */ -#define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /**< Conceal errors in decoded - frames */ -#define VPX_CODEC_USE_INPUT_FRAGMENTS 0x40000 /**< The input frame should be - passed to the decoder one - fragment at a time */ -#define VPX_CODEC_USE_FRAME_THREADING 0x80000 /**< Enable frame-based - multi-threading */ - - /*!\brief Stream properties - * - * This structure is used to query or set properties of the decoded - * stream. Algorithms may extend this structure with data specific - * to their bitstream by setting the sz member appropriately. - */ - typedef struct vpx_codec_stream_info { - unsigned int sz; /**< Size of this structure */ - unsigned int w; /**< Width (or 0 for unknown/default) */ - unsigned int h; /**< Height (or 0 for unknown/default) */ - unsigned int is_kf; /**< Current frame is a keyframe */ - } vpx_codec_stream_info_t; - - /* REQUIRED FUNCTIONS - * - * The following functions are required to be implemented for all decoders. - * They represent the base case functionality expected of all decoders. - */ - - - /*!\brief Initialization Configurations - * - * This structure is used to pass init time configuration options to the - * decoder. - */ - typedef struct vpx_codec_dec_cfg { - unsigned int threads; /**< Maximum number of threads to use, default 1 */ - unsigned int w; /**< Width */ - unsigned int h; /**< Height */ - } vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */ - - - /*!\brief Initialize a decoder instance - * - * Initializes a decoder context using the given interface. Applications - * should call the vpx_codec_dec_init convenience macro instead of this - * function directly, to ensure that the ABI version number parameter - * is properly initialized. - * - * If the library was configured with --disable-multithread, this call - * is not thread safe and should be guarded with a lock if being used - * in a multithreaded context. - * - * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the algorithm interface to use. - * \param[in] cfg Configuration to use, if known. May be NULL. - * \param[in] flags Bitfield of VPX_CODEC_USE_* flags - * \param[in] ver ABI version number. Must be set to - * VPX_DECODER_ABI_VERSION - * \retval #VPX_CODEC_OK - * The decoder algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - const vpx_codec_dec_cfg_t *cfg, - vpx_codec_flags_t flags, - int ver); - - /*!\brief Convenience macro for vpx_codec_dec_init_ver() - * - * Ensures the ABI version parameter is properly set. - */ -#define vpx_codec_dec_init(ctx, iface, cfg, flags) \ - vpx_codec_dec_init_ver(ctx, iface, cfg, flags, VPX_DECODER_ABI_VERSION) - - - /*!\brief Parse stream info from a buffer - * - * Performs high level parsing of the bitstream. Construction of a decoder - * context is not necessary. Can be used to determine if the bitstream is - * of the proper format, and to extract information from the stream. - * - * \param[in] iface Pointer to the algorithm interface - * \param[in] data Pointer to a block of data to parse - * \param[in] data_sz Size of the data buffer - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ - vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, - const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si); - - - /*!\brief Return information about the current stream. - * - * Returns information about the stream that has been parsed during decoding. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ - vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, - vpx_codec_stream_info_t *si); - - - /*!\brief Decode data - * - * Processes a buffer of coded data. If the processing results in a new - * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be - * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode - * time stamp) order. Frames produced will always be in PTS (presentation - * time stamp) order. - * If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled, - * data and data_sz can contain a fragment of the encoded frame. Fragment - * \#n must contain at least partition \#n, but can also contain subsequent - * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must - * be empty. When no more data is available, this function should be called - * with NULL as data and 0 as data_sz. The memory passed to this function - * must be available until the frame has been decoded. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] data Pointer to this block of new coded data. If - * NULL, a VPX_CODEC_CB_PUT_FRAME event is posted - * for the previously decoded frame. - * \param[in] data_sz Size of the coded data, in bytes. - * \param[in] user_priv Application specific data to associate with - * this frame. - * \param[in] deadline Soft deadline the decoder should attempt to meet, - * in us. Set to zero for unlimited. - * - * \return Returns #VPX_CODEC_OK if the coded data was processed completely - * and future pictures can be decoded without error. Otherwise, - * see the descriptions of the other error codes in ::vpx_codec_err_t - * for recoverability capabilities. - */ - vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline); - - - /*!\brief Decoded frames iterator - * - * Iterates over a list of the frames available for display. The iterator - * storage should be initialized to NULL to start the iteration. Iteration is - * complete when this function returns NULL. - * - * The list of available frames becomes valid upon completion of the - * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] iter Iterator storage, initialized to NULL - * - * \return Returns a pointer to an image, if one is ready for display. Frames - * produced will always be in PTS (presentation time stamp) order. - */ - vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, - vpx_codec_iter_t *iter); - - - /*!\defgroup cap_put_frame Frame-Based Decoding Functions - * - * The following functions are required to be implemented for all decoders - * that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling these functions - * for codecs that don't advertise this capability will result in an error - * code being returned, usually VPX_CODEC_ERROR - * @{ - */ - - /*!\brief put frame callback prototype - * - * This callback is invoked by the decoder to notify the application of - * the availability of decoded image data. - */ - typedef void (*vpx_codec_put_frame_cb_fn_t)(void *user_priv, - const vpx_image_t *img); - - - /*!\brief Register for notification of frame completion. - * - * Registers a given function to be called when a decoded frame is - * available. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb Pointer to the callback function - * \param[in] user_priv User's private data - * - * \retval #VPX_CODEC_OK - * Callback successfully registered. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * posting slice completion. - */ - vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_frame_cb_fn_t cb, - void *user_priv); - - - /*!@} - end defgroup cap_put_frame */ - - /*!\defgroup cap_put_slice Slice-Based Decoding Functions - * - * The following functions are required to be implemented for all decoders - * that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling these functions - * for codecs that don't advertise this capability will result in an error - * code being returned, usually VPX_CODEC_ERROR - * @{ - */ - - /*!\brief put slice callback prototype - * - * This callback is invoked by the decoder to notify the application of - * the availability of partially decoded image data. The - */ - typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv, - const vpx_image_t *img, - const vpx_image_rect_t *valid, - const vpx_image_rect_t *update); - - - /*!\brief Register for notification of slice completion. - * - * Registers a given function to be called when a decoded slice is - * available. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb Pointer to the callback function - * \param[in] user_priv User's private data - * - * \retval #VPX_CODEC_OK - * Callback successfully registered. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * posting slice completion. - */ - vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_slice_cb_fn_t cb, - void *user_priv); - - - /*!@} - end defgroup cap_put_slice*/ - - /*!\defgroup cap_external_frame_buffer External Frame Buffer Functions - * - * The following section is required to be implemented for all decoders - * that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability. - * Calling this function for codecs that don't advertise this capability - * will result in an error code being returned, usually VPX_CODEC_ERROR. - * - * \note - * Currently this only works with VP9. - * @{ - */ - - /*!\brief Pass in external frame buffers for the decoder to use. - * - * Registers functions to be called when libvpx needs a frame buffer - * to decode the current frame and a function to be called when libvpx does - * not internally reference the frame buffer. This set function must - * be called before the first call to decode or libvpx will assume the - * default behavior of allocating frame buffers internally. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb_get Pointer to the get callback function - * \param[in] cb_release Pointer to the release callback function - * \param[in] cb_priv Callback's private data - * - * \retval #VPX_CODEC_OK - * External frame buffers will be used by libvpx. - * \retval #VPX_CODEC_INVALID_PARAM - * One or more of the callbacks were NULL. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * using external frame buffers. - * - * \note - * When decoding VP9, the application may be required to pass in at least - * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame - * buffers. - */ - vpx_codec_err_t vpx_codec_set_frame_buffer_functions( - vpx_codec_ctx_t *ctx, - vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); - - /*!@} - end defgroup cap_external_frame_buffer */ - - /*!@} - end defgroup decoder*/ -#ifdef __cplusplus -} -#endif -#endif // VPX_VPX_DECODER_H_ - diff --git a/thirdparty/libvpx/vpx/vpx_encoder.h b/thirdparty/libvpx/vpx/vpx_encoder.h deleted file mode 100644 index 955e873519..0000000000 --- a/thirdparty/libvpx/vpx/vpx_encoder.h +++ /dev/null @@ -1,1043 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_VPX_ENCODER_H_ -#define VPX_VPX_ENCODER_H_ - -/*!\defgroup encoder Encoder Algorithm Interface - * \ingroup codec - * This abstraction allows applications using this encoder to easily support - * multiple video formats with minimal code duplication. This section describes - * the interface common to all encoders. - * @{ - */ - -/*!\file - * \brief Describes the encoder algorithm interface to applications. - * - * This file describes the interface between an application and a - * video encoder algorithm. - * - */ -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_codec.h" - - /*! Temporal Scalability: Maximum length of the sequence defining frame - * layer membership - */ -#define VPX_TS_MAX_PERIODICITY 16 - - /*! Temporal Scalability: Maximum number of coding layers */ -#define VPX_TS_MAX_LAYERS 5 - - /*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */ -#define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY - -/*! Temporal+Spatial Scalability: Maximum number of coding layers */ -#define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed. - -/*!\deprecated Use #VPX_MAX_LAYERS instead. */ -#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed. - -/*! Spatial Scalability: Maximum number of coding layers */ -#define VPX_SS_MAX_LAYERS 5 - -/*! Spatial Scalability: Default number of coding layers */ -#define VPX_SS_DEFAULT_LAYERS 1 - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_ENCODER_ABI_VERSION (5 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ - - - /*! \brief Encoder capabilities bitfield - * - * Each encoder advertises the capabilities it supports as part of its - * ::vpx_codec_iface_t interface structure. Capabilities are extra - * interfaces or functionality, and are not required to be supported - * by an encoder. - * - * The available flags are specified by VPX_CODEC_CAP_* defines. - */ -#define VPX_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */ - - /*! Can output one partition at a time. Each partition is returned in its - * own VPX_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for - * every partition but the last. In this mode all frames are always - * returned partition by partition. - */ -#define VPX_CODEC_CAP_OUTPUT_PARTITION 0x20000 - -/*! Can support input images at greater than 8 bitdepth. - */ -#define VPX_CODEC_CAP_HIGHBITDEPTH 0x40000 - - /*! \brief Initialization-time Feature Enabling - * - * Certain codec features must be known at initialization time, to allow - * for proper memory allocation. - * - * The available flags are specified by VPX_CODEC_USE_* defines. - */ -#define VPX_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */ -#define VPX_CODEC_USE_OUTPUT_PARTITION 0x20000 /**< Make the encoder output one - partition at a time. */ -#define VPX_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */ - - - /*!\brief Generic fixed size buffer structure - * - * This structure is able to hold a reference to any fixed size buffer. - */ - typedef struct vpx_fixed_buf { - void *buf; /**< Pointer to the data */ - size_t sz; /**< Length of the buffer, in chars */ - } vpx_fixed_buf_t; /**< alias for struct vpx_fixed_buf */ - - - /*!\brief Time Stamp Type - * - * An integer, which when multiplied by the stream's time base, provides - * the absolute time of a sample. - */ - typedef int64_t vpx_codec_pts_t; - - - /*!\brief Compressed Frame Flags - * - * This type represents a bitfield containing information about a compressed - * frame that may be useful to an application. The most significant 16 bits - * can be used by an algorithm to provide additional detail, for example to - * support frame types that are codec specific (MPEG-1 D-frames for example) - */ - typedef uint32_t vpx_codec_frame_flags_t; -#define VPX_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */ -#define VPX_FRAME_IS_DROPPABLE 0x2 /**< frame can be dropped without affecting - the stream (no future frame depends on - this one) */ -#define VPX_FRAME_IS_INVISIBLE 0x4 /**< frame should be decoded but will not - be shown */ -#define VPX_FRAME_IS_FRAGMENT 0x8 /**< this is a fragment of the encoded - frame */ - - /*!\brief Error Resilient flags - * - * These flags define which error resilient features to enable in the - * encoder. The flags are specified through the - * vpx_codec_enc_cfg::g_error_resilient variable. - */ - typedef uint32_t vpx_codec_er_flags_t; -#define VPX_ERROR_RESILIENT_DEFAULT 0x1 /**< Improve resiliency against - losses of whole frames */ -#define VPX_ERROR_RESILIENT_PARTITIONS 0x2 /**< The frame partitions are - independently decodable by the - bool decoder, meaning that - partitions can be decoded even - though earlier partitions have - been lost. Note that intra - prediction is still done over - the partition boundary. */ - - /*!\brief Encoder output packet variants - * - * This enumeration lists the different kinds of data packets that can be - * returned by calls to vpx_codec_get_cx_data(). Algorithms \ref MAY - * extend this list to provide additional functionality. - */ - enum vpx_codec_cx_pkt_kind { - VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ - VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ - VPX_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */ - VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ - // Spatial SVC is still experimental and may be removed before the next ABI - // bump. -#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) - VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/ - VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/ -#endif - VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */ - }; - - - /*!\brief Encoder output packet - * - * This structure contains the different kinds of output data the encoder - * may produce while compressing a frame. - */ - typedef struct vpx_codec_cx_pkt { - enum vpx_codec_cx_pkt_kind kind; /**< packet variant */ - union { - struct { - void *buf; /**< compressed data buffer */ - size_t sz; /**< length of compressed data */ - vpx_codec_pts_t pts; /**< time stamp to show frame - (in timebase units) */ - unsigned long duration; /**< duration to show frame - (in timebase units) */ - vpx_codec_frame_flags_t flags; /**< flags for this frame */ - int partition_id; /**< the partition id - defines the decoding order - of the partitions. Only - applicable when "output partition" - mode is enabled. First partition - has id 0.*/ - - } frame; /**< data for compressed frame packet */ - vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */ - vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */ - struct vpx_psnr_pkt { - unsigned int samples[4]; /**< Number of samples, total/y/u/v */ - uint64_t sse[4]; /**< sum squared error, total/y/u/v */ - double psnr[4]; /**< PSNR, total/y/u/v */ - } psnr; /**< data for PSNR packet */ - vpx_fixed_buf_t raw; /**< data for arbitrary packets */ - // Spatial SVC is still experimental and may be removed before the next - // ABI bump. -#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) - size_t layer_sizes[VPX_SS_MAX_LAYERS]; - struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS]; -#endif - - /* This packet size is fixed to allow codecs to extend this - * interface without having to manage storage for raw packets, - * i.e., if it's smaller than 128 bytes, you can store in the - * packet list directly. - */ - char pad[128 - sizeof(enum vpx_codec_cx_pkt_kind)]; /**< fixed sz */ - } data; /**< packet data */ - } vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */ - - - /*!\brief Encoder return output buffer callback - * - * This callback function, when registered, returns with packets when each - * spatial layer is encoded. - */ - // putting the definitions here for now. (agrange: find if there - // is a better place for this) - typedef void (* vpx_codec_enc_output_cx_pkt_cb_fn_t)(vpx_codec_cx_pkt_t *pkt, - void *user_data); - - /*!\brief Callback function pointer / user data pair storage */ - typedef struct vpx_codec_enc_output_cx_cb_pair { - vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt; /**< Callback function */ - void *user_priv; /**< Pointer to private data */ - } vpx_codec_priv_output_cx_pkt_cb_pair_t; - - /*!\brief Rational Number - * - * This structure holds a fractional value. - */ - typedef struct vpx_rational { - int num; /**< fraction numerator */ - int den; /**< fraction denominator */ - } vpx_rational_t; /**< alias for struct vpx_rational */ - - - /*!\brief Multi-pass Encoding Pass */ - enum vpx_enc_pass { - VPX_RC_ONE_PASS, /**< Single pass mode */ - VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */ - VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */ - }; - - - /*!\brief Rate control mode */ - enum vpx_rc_mode { - VPX_VBR, /**< Variable Bit Rate (VBR) mode */ - VPX_CBR, /**< Constant Bit Rate (CBR) mode */ - VPX_CQ, /**< Constrained Quality (CQ) mode */ - VPX_Q, /**< Constant Quality (Q) mode */ - }; - - - /*!\brief Keyframe placement mode. - * - * This enumeration determines whether keyframes are placed automatically by - * the encoder or whether this behavior is disabled. Older releases of this - * SDK were implemented such that VPX_KF_FIXED meant keyframes were disabled. - * This name is confusing for this behavior, so the new symbols to be used - * are VPX_KF_AUTO and VPX_KF_DISABLED. - */ - enum vpx_kf_mode { - VPX_KF_FIXED, /**< deprecated, implies VPX_KF_DISABLED */ - VPX_KF_AUTO, /**< Encoder determines optimal placement automatically */ - VPX_KF_DISABLED = 0 /**< Encoder does not place keyframes. */ - }; - - - /*!\brief Encoded Frame Flags - * - * This type indicates a bitfield to be passed to vpx_codec_encode(), defining - * per-frame boolean values. By convention, bits common to all codecs will be - * named VPX_EFLAG_*, and bits specific to an algorithm will be named - * /algo/_eflag_*. The lower order 16 bits are reserved for common use. - */ - typedef long vpx_enc_frame_flags_t; -#define VPX_EFLAG_FORCE_KF (1<<0) /**< Force this frame to be a keyframe */ - - - /*!\brief Encoder configuration structure - * - * This structure contains the encoder settings that have common representations - * across all codecs. This doesn't imply that all codecs support all features, - * however. - */ - typedef struct vpx_codec_enc_cfg { - /* - * generic settings (g) - */ - - /*!\brief Algorithm specific "usage" value - * - * Algorithms may define multiple values for usage, which may convey the - * intent of how the application intends to use the stream. If this value - * is non-zero, consult the documentation for the codec to determine its - * meaning. - */ - unsigned int g_usage; - - - /*!\brief Maximum number of threads to use - * - * For multi-threaded implementations, use no more than this number of - * threads. The codec may use fewer threads than allowed. The value - * 0 is equivalent to the value 1. - */ - unsigned int g_threads; - - - /*!\brief Bitstream profile to use - * - * Some codecs support a notion of multiple bitstream profiles. Typically - * this maps to a set of features that are turned on or off. Often the - * profile to use is determined by the features of the intended decoder. - * Consult the documentation for the codec to determine the valid values - * for this parameter, or set to zero for a sane default. - */ - unsigned int g_profile; /**< profile of bitstream to use */ - - - - /*!\brief Width of the frame - * - * This value identifies the presentation resolution of the frame, - * in pixels. Note that the frames passed as input to the encoder must - * have this resolution. Frames will be presented by the decoder in this - * resolution, independent of any spatial resampling the encoder may do. - */ - unsigned int g_w; - - - /*!\brief Height of the frame - * - * This value identifies the presentation resolution of the frame, - * in pixels. Note that the frames passed as input to the encoder must - * have this resolution. Frames will be presented by the decoder in this - * resolution, independent of any spatial resampling the encoder may do. - */ - unsigned int g_h; - - /*!\brief Bit-depth of the codec - * - * This value identifies the bit_depth of the codec, - * Only certain bit-depths are supported as identified in the - * vpx_bit_depth_t enum. - */ - vpx_bit_depth_t g_bit_depth; - - /*!\brief Bit-depth of the input frames - * - * This value identifies the bit_depth of the input frames in bits. - * Note that the frames passed as input to the encoder must have - * this bit-depth. - */ - unsigned int g_input_bit_depth; - - /*!\brief Stream timebase units - * - * Indicates the smallest interval of time, in seconds, used by the stream. - * For fixed frame rate material, or variable frame rate material where - * frames are timed at a multiple of a given clock (ex: video capture), - * the \ref RECOMMENDED method is to set the timebase to the reciprocal - * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the - * pts to correspond to the frame number, which can be handy. For - * re-encoding video from containers with absolute time timestamps, the - * \ref RECOMMENDED method is to set the timebase to that of the parent - * container or multimedia framework (ex: 1/1000 for ms, as in FLV). - */ - struct vpx_rational g_timebase; - - - /*!\brief Enable error resilient modes. - * - * The error resilient bitfield indicates to the encoder which features - * it should enable to take measures for streaming over lossy or noisy - * links. - */ - vpx_codec_er_flags_t g_error_resilient; - - - /*!\brief Multi-pass Encoding Mode - * - * This value should be set to the current phase for multi-pass encoding. - * For single pass, set to #VPX_RC_ONE_PASS. - */ - enum vpx_enc_pass g_pass; - - - /*!\brief Allow lagged encoding - * - * If set, this value allows the encoder to consume a number of input - * frames before producing output frames. This allows the encoder to - * base decisions for the current frame on future frames. This does - * increase the latency of the encoding pipeline, so it is not appropriate - * in all situations (ex: realtime encoding). - * - * Note that this is a maximum value -- the encoder may produce frames - * sooner than the given limit. Set this value to 0 to disable this - * feature. - */ - unsigned int g_lag_in_frames; - - - /* - * rate control settings (rc) - */ - - /*!\brief Temporal resampling configuration, if supported by the codec. - * - * Temporal resampling allows the codec to "drop" frames as a strategy to - * meet its target data rate. This can cause temporal discontinuities in - * the encoded video, which may appear as stuttering during playback. This - * trade-off is often acceptable, but for many applications is not. It can - * be disabled in these cases. - * - * Note that not all codecs support this feature. All vpx VPx codecs do. - * For other codecs, consult the documentation for that algorithm. - * - * This threshold is described as a percentage of the target data buffer. - * When the data buffer falls below this percentage of fullness, a - * dropped frame is indicated. Set the threshold to zero (0) to disable - * this feature. - */ - unsigned int rc_dropframe_thresh; - - - /*!\brief Enable/disable spatial resampling, if supported by the codec. - * - * Spatial resampling allows the codec to compress a lower resolution - * version of the frame, which is then upscaled by the encoder to the - * correct presentation resolution. This increases visual quality at - * low data rates, at the expense of CPU time on the encoder/decoder. - */ - unsigned int rc_resize_allowed; - - /*!\brief Internal coded frame width. - * - * If spatial resampling is enabled this specifies the width of the - * encoded frame. - */ - unsigned int rc_scaled_width; - - /*!\brief Internal coded frame height. - * - * If spatial resampling is enabled this specifies the height of the - * encoded frame. - */ - unsigned int rc_scaled_height; - - /*!\brief Spatial resampling up watermark. - * - * This threshold is described as a percentage of the target data buffer. - * When the data buffer rises above this percentage of fullness, the - * encoder will step up to a higher resolution version of the frame. - */ - unsigned int rc_resize_up_thresh; - - - /*!\brief Spatial resampling down watermark. - * - * This threshold is described as a percentage of the target data buffer. - * When the data buffer falls below this percentage of fullness, the - * encoder will step down to a lower resolution version of the frame. - */ - unsigned int rc_resize_down_thresh; - - - /*!\brief Rate control algorithm to use. - * - * Indicates whether the end usage of this stream is to be streamed over - * a bandwidth constrained link, indicating that Constant Bit Rate (CBR) - * mode should be used, or whether it will be played back on a high - * bandwidth link, as from a local disk, where higher variations in - * bitrate are acceptable. - */ - enum vpx_rc_mode rc_end_usage; - - - /*!\brief Two-pass stats buffer. - * - * A buffer containing all of the stats packets produced in the first - * pass, concatenated. - */ - vpx_fixed_buf_t rc_twopass_stats_in; - - /*!\brief first pass mb stats buffer. - * - * A buffer containing all of the first pass mb stats packets produced - * in the first pass, concatenated. - */ - vpx_fixed_buf_t rc_firstpass_mb_stats_in; - - /*!\brief Target data rate - * - * Target bandwidth to use for this stream, in kilobits per second. - */ - unsigned int rc_target_bitrate; - - - /* - * quantizer settings - */ - - - /*!\brief Minimum (Best Quality) Quantizer - * - * The quantizer is the most direct control over the quality of the - * encoded image. The range of valid values for the quantizer is codec - * specific. Consult the documentation for the codec to determine the - * values to use. To determine the range programmatically, call - * vpx_codec_enc_config_default() with a usage value of 0. - */ - unsigned int rc_min_quantizer; - - - /*!\brief Maximum (Worst Quality) Quantizer - * - * The quantizer is the most direct control over the quality of the - * encoded image. The range of valid values for the quantizer is codec - * specific. Consult the documentation for the codec to determine the - * values to use. To determine the range programmatically, call - * vpx_codec_enc_config_default() with a usage value of 0. - */ - unsigned int rc_max_quantizer; - - - /* - * bitrate tolerance - */ - - - /*!\brief Rate control adaptation undershoot control - * - * This value, expressed as a percentage of the target bitrate, - * controls the maximum allowed adaptation speed of the codec. - * This factor controls the maximum amount of bits that can - * be subtracted from the target bitrate in order to compensate - * for prior overshoot. - * - * Valid values in the range 0-1000. - */ - unsigned int rc_undershoot_pct; - - - /*!\brief Rate control adaptation overshoot control - * - * This value, expressed as a percentage of the target bitrate, - * controls the maximum allowed adaptation speed of the codec. - * This factor controls the maximum amount of bits that can - * be added to the target bitrate in order to compensate for - * prior undershoot. - * - * Valid values in the range 0-1000. - */ - unsigned int rc_overshoot_pct; - - - /* - * decoder buffer model parameters - */ - - - /*!\brief Decoder Buffer Size - * - * This value indicates the amount of data that may be buffered by the - * decoding application. Note that this value is expressed in units of - * time (milliseconds). For example, a value of 5000 indicates that the - * client will buffer (at least) 5000ms worth of encoded data. Use the - * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if - * necessary. - */ - unsigned int rc_buf_sz; - - - /*!\brief Decoder Buffer Initial Size - * - * This value indicates the amount of data that will be buffered by the - * decoding application prior to beginning playback. This value is - * expressed in units of time (milliseconds). Use the target bitrate - * (#rc_target_bitrate) to convert to bits/bytes, if necessary. - */ - unsigned int rc_buf_initial_sz; - - - /*!\brief Decoder Buffer Optimal Size - * - * This value indicates the amount of data that the encoder should try - * to maintain in the decoder's buffer. This value is expressed in units - * of time (milliseconds). Use the target bitrate (#rc_target_bitrate) - * to convert to bits/bytes, if necessary. - */ - unsigned int rc_buf_optimal_sz; - - - /* - * 2 pass rate control parameters - */ - - - /*!\brief Two-pass mode CBR/VBR bias - * - * Bias, expressed on a scale of 0 to 100, for determining target size - * for the current frame. The value 0 indicates the optimal CBR mode - * value should be used. The value 100 indicates the optimal VBR mode - * value should be used. Values in between indicate which way the - * encoder should "lean." - */ - unsigned int rc_2pass_vbr_bias_pct; /**< RC mode bias between CBR and VBR(0-100: 0->CBR, 100->VBR) */ - - - /*!\brief Two-pass mode per-GOP minimum bitrate - * - * This value, expressed as a percentage of the target bitrate, indicates - * the minimum bitrate to be used for a single GOP (aka "section") - */ - unsigned int rc_2pass_vbr_minsection_pct; - - - /*!\brief Two-pass mode per-GOP maximum bitrate - * - * This value, expressed as a percentage of the target bitrate, indicates - * the maximum bitrate to be used for a single GOP (aka "section") - */ - unsigned int rc_2pass_vbr_maxsection_pct; - - - /* - * keyframing settings (kf) - */ - - /*!\brief Keyframe placement mode - * - * This value indicates whether the encoder should place keyframes at a - * fixed interval, or determine the optimal placement automatically - * (as governed by the #kf_min_dist and #kf_max_dist parameters) - */ - enum vpx_kf_mode kf_mode; - - - /*!\brief Keyframe minimum interval - * - * This value, expressed as a number of frames, prevents the encoder from - * placing a keyframe nearer than kf_min_dist to the previous keyframe. At - * least kf_min_dist frames non-keyframes will be coded before the next - * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval. - */ - unsigned int kf_min_dist; - - - /*!\brief Keyframe maximum interval - * - * This value, expressed as a number of frames, forces the encoder to code - * a keyframe if one has not been coded in the last kf_max_dist frames. - * A value of 0 implies all frames will be keyframes. Set kf_min_dist - * equal to kf_max_dist for a fixed interval. - */ - unsigned int kf_max_dist; - - /* - * Spatial scalability settings (ss) - */ - - /*!\brief Number of spatial coding layers. - * - * This value specifies the number of spatial coding layers to be used. - */ - unsigned int ss_number_layers; - - /*!\brief Enable auto alt reference flags for each spatial layer. - * - * These values specify if auto alt reference frame is enabled for each - * spatial layer. - */ - int ss_enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; - - /*!\brief Target bitrate for each spatial layer. - * - * These values specify the target coding bitrate to be used for each - * spatial layer. - */ - unsigned int ss_target_bitrate[VPX_SS_MAX_LAYERS]; - - /*!\brief Number of temporal coding layers. - * - * This value specifies the number of temporal layers to be used. - */ - unsigned int ts_number_layers; - - /*!\brief Target bitrate for each temporal layer. - * - * These values specify the target coding bitrate to be used for each - * temporal layer. - */ - unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS]; - - /*!\brief Frame rate decimation factor for each temporal layer. - * - * These values specify the frame rate decimation factors to apply - * to each temporal layer. - */ - unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS]; - - /*!\brief Length of the sequence defining frame temporal layer membership. - * - * This value specifies the length of the sequence that defines the - * membership of frames to temporal layers. For example, if the - * ts_periodicity = 8, then the frames are assigned to coding layers with a - * repeated sequence of length 8. - */ - unsigned int ts_periodicity; - - /*!\brief Template defining the membership of frames to temporal layers. - * - * This array defines the membership of frames to temporal coding layers. - * For a 2-layer encoding that assigns even numbered frames to one temporal - * layer (0) and odd numbered frames to a second temporal layer (1) with - * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). - */ - unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; - - /*!\brief Target bitrate for each spatial/temporal layer. - * - * These values specify the target coding bitrate to be used for each - * spatial/temporal layer. - * - */ - unsigned int layer_target_bitrate[VPX_MAX_LAYERS]; - - /*!\brief Temporal layering mode indicating which temporal layering scheme to use. - * - * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the - * temporal layering mode to use. - * - */ - int temporal_layering_mode; - } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ - - /*!\brief vp9 svc extra configure parameters - * - * This defines max/min quantizers and scale factors for each layer - * - */ - typedef struct vpx_svc_parameters { - int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */ - int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */ - int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */ - int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */ - int temporal_layering_mode; /**< Temporal layering mode */ - } vpx_svc_extra_cfg_t; - - - /*!\brief Initialize an encoder instance - * - * Initializes a encoder context using the given interface. Applications - * should call the vpx_codec_enc_init convenience macro instead of this - * function directly, to ensure that the ABI version number parameter - * is properly initialized. - * - * If the library was configured with --disable-multithread, this call - * is not thread safe and should be guarded with a lock if being used - * in a multithreaded context. - * - * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the algorithm interface to use. - * \param[in] cfg Configuration to use, if known. May be NULL. - * \param[in] flags Bitfield of VPX_CODEC_USE_* flags - * \param[in] ver ABI version number. Must be set to - * VPX_ENCODER_ABI_VERSION - * \retval #VPX_CODEC_OK - * The decoder algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - const vpx_codec_enc_cfg_t *cfg, - vpx_codec_flags_t flags, - int ver); - - - /*!\brief Convenience macro for vpx_codec_enc_init_ver() - * - * Ensures the ABI version parameter is properly set. - */ -#define vpx_codec_enc_init(ctx, iface, cfg, flags) \ - vpx_codec_enc_init_ver(ctx, iface, cfg, flags, VPX_ENCODER_ABI_VERSION) - - - /*!\brief Initialize multi-encoder instance - * - * Initializes multi-encoder context using the given interface. - * Applications should call the vpx_codec_enc_init_multi convenience macro - * instead of this function directly, to ensure that the ABI version number - * parameter is properly initialized. - * - * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the algorithm interface to use. - * \param[in] cfg Configuration to use, if known. May be NULL. - * \param[in] num_enc Total number of encoders. - * \param[in] flags Bitfield of VPX_CODEC_USE_* flags - * \param[in] dsf Pointer to down-sampling factors. - * \param[in] ver ABI version number. Must be set to - * VPX_ENCODER_ABI_VERSION - * \retval #VPX_CODEC_OK - * The decoder algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - vpx_codec_enc_cfg_t *cfg, - int num_enc, - vpx_codec_flags_t flags, - vpx_rational_t *dsf, - int ver); - - - /*!\brief Convenience macro for vpx_codec_enc_init_multi_ver() - * - * Ensures the ABI version parameter is properly set. - */ -#define vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \ - vpx_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \ - VPX_ENCODER_ABI_VERSION) - - - /*!\brief Get a default configuration - * - * Initializes a encoder configuration structure with default values. Supports - * the notion of "usages" so that an algorithm may offer different default - * settings depending on the user's intended goal. This function \ref SHOULD - * be called by all applications to initialize the configuration structure - * before specializing the configuration with application specific values. - * - * \param[in] iface Pointer to the algorithm interface to use. - * \param[out] cfg Configuration buffer to populate. - * \param[in] reserved Must set to 0 for VP8 and VP9. - * - * \retval #VPX_CODEC_OK - * The configuration was populated. - * \retval #VPX_CODEC_INCAPABLE - * Interface is not an encoder interface. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, or the usage value was not recognized. - */ - vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, - vpx_codec_enc_cfg_t *cfg, - unsigned int reserved); - - - /*!\brief Set or change configuration - * - * Reconfigures an encoder instance according to the given configuration. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cfg Configuration buffer to use - * - * \retval #VPX_CODEC_OK - * The configuration was populated. - * \retval #VPX_CODEC_INCAPABLE - * Interface is not an encoder interface. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, or the usage value was not recognized. - */ - vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx, - const vpx_codec_enc_cfg_t *cfg); - - - /*!\brief Get global stream headers - * - * Retrieves a stream level global header packet, if supported by the codec. - * - * \param[in] ctx Pointer to this instance's context - * - * \retval NULL - * Encoder does not support global header - * \retval Non-NULL - * Pointer to buffer containing global header packet - */ - vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx); - - -#define VPX_DL_REALTIME (1) /**< deadline parameter analogous to - * VPx REALTIME mode. */ -#define VPX_DL_GOOD_QUALITY (1000000) /**< deadline parameter analogous to - * VPx GOOD QUALITY mode. */ -#define VPX_DL_BEST_QUALITY (0) /**< deadline parameter analogous to - * VPx BEST QUALITY mode. */ - /*!\brief Encode a frame - * - * Encodes a video frame at the given "presentation time." The presentation - * time stamp (PTS) \ref MUST be strictly increasing. - * - * The encoder supports the notion of a soft real-time deadline. Given a - * non-zero value to the deadline parameter, the encoder will make a "best - * effort" guarantee to return before the given time slice expires. It is - * implicit that limiting the available time to encode will degrade the - * output quality. The encoder can be given an unlimited time to produce the - * best possible frame by specifying a deadline of '0'. This deadline - * supercedes the VPx notion of "best quality, good quality, realtime". - * Applications that wish to map these former settings to the new deadline - * based system can use the symbols #VPX_DL_REALTIME, #VPX_DL_GOOD_QUALITY, - * and #VPX_DL_BEST_QUALITY. - * - * When the last frame has been passed to the encoder, this function should - * continue to be called, with the img parameter set to NULL. This will - * signal the end-of-stream condition to the encoder and allow it to encode - * any held buffers. Encoding is complete when vpx_codec_encode() is called - * and vpx_codec_get_cx_data() returns no data. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] img Image data to encode, NULL to flush. - * \param[in] pts Presentation time stamp, in timebase units. - * \param[in] duration Duration to show frame, in timebase units. - * \param[in] flags Flags to use for encoding this frame. - * \param[in] deadline Time to spend encoding, in microseconds. (0=infinite) - * - * \retval #VPX_CODEC_OK - * The configuration was populated. - * \retval #VPX_CODEC_INCAPABLE - * Interface is not an encoder interface. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, the image format is unsupported, etc. - */ - vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, - const vpx_image_t *img, - vpx_codec_pts_t pts, - unsigned long duration, - vpx_enc_frame_flags_t flags, - unsigned long deadline); - - /*!\brief Set compressed data output buffer - * - * Sets the buffer that the codec should output the compressed data - * into. This call effectively sets the buffer pointer returned in the - * next VPX_CODEC_CX_FRAME_PKT packet. Subsequent packets will be - * appended into this buffer. The buffer is preserved across frames, - * so applications must periodically call this function after flushing - * the accumulated compressed data to disk or to the network to reset - * the pointer to the buffer's head. - * - * `pad_before` bytes will be skipped before writing the compressed - * data, and `pad_after` bytes will be appended to the packet. The size - * of the packet will be the sum of the size of the actual compressed - * data, pad_before, and pad_after. The padding bytes will be preserved - * (not overwritten). - * - * Note that calling this function does not guarantee that the returned - * compressed data will be placed into the specified buffer. In the - * event that the encoded data will not fit into the buffer provided, - * the returned packet \ref MAY point to an internal buffer, as it would - * if this call were never used. In this event, the output packet will - * NOT have any padding, and the application must free space and copy it - * to the proper place. This is of particular note in configurations - * that may output multiple packets for a single encoded frame (e.g., lagged - * encoding) or if the application does not reset the buffer periodically. - * - * Applications may restore the default behavior of the codec providing - * the compressed data buffer by calling this function with a NULL - * buffer. - * - * Applications \ref MUSTNOT call this function during iteration of - * vpx_codec_get_cx_data(). - * - * \param[in] ctx Pointer to this instance's context - * \param[in] buf Buffer to store compressed data into - * \param[in] pad_before Bytes to skip before writing compressed data - * \param[in] pad_after Bytes to skip after writing compressed data - * - * \retval #VPX_CODEC_OK - * The buffer was set successfully. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, the image format is unsupported, etc. - */ - vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx, - const vpx_fixed_buf_t *buf, - unsigned int pad_before, - unsigned int pad_after); - - - /*!\brief Encoded data iterator - * - * Iterates over a list of data packets to be passed from the encoder to the - * application. The different kinds of packets available are enumerated in - * #vpx_codec_cx_pkt_kind. - * - * #VPX_CODEC_CX_FRAME_PKT packets should be passed to the application's - * muxer. Multiple compressed frames may be in the list. - * #VPX_CODEC_STATS_PKT packets should be appended to a global buffer. - * - * The application \ref MUST silently ignore any packet kinds that it does - * not recognize or support. - * - * The data buffers returned from this function are only guaranteed to be - * valid until the application makes another call to any vpx_codec_* function. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] iter Iterator storage, initialized to NULL - * - * \return Returns a pointer to an output data packet (compressed frame data, - * two-pass statistics, etc.) or NULL to signal end-of-list. - * - */ - const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, - vpx_codec_iter_t *iter); - - - /*!\brief Get Preview Frame - * - * Returns an image that can be used as a preview. Shows the image as it would - * exist at the decompressor. The application \ref MUST NOT write into this - * image buffer. - * - * \param[in] ctx Pointer to this instance's context - * - * \return Returns a pointer to a preview image, or NULL if no image is - * available. - * - */ - const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx); - - - /*!@} - end defgroup encoder*/ -#ifdef __cplusplus -} -#endif -#endif // VPX_VPX_ENCODER_H_ - diff --git a/thirdparty/libvpx/vpx/vpx_frame_buffer.h b/thirdparty/libvpx/vpx/vpx_frame_buffer.h deleted file mode 100644 index 9036459af0..0000000000 --- a/thirdparty/libvpx/vpx/vpx_frame_buffer.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_VPX_FRAME_BUFFER_H_ -#define VPX_VPX_FRAME_BUFFER_H_ - -/*!\file - * \brief Describes the decoder external frame buffer interface. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_integer.h" - -/*!\brief The maximum number of work buffers used by libvpx. - * Support maximum 4 threads to decode video in parallel. - * Each thread will use one work buffer. - * TODO(hkuang): Add support to set number of worker threads dynamically. - */ -#define VPX_MAXIMUM_WORK_BUFFERS 8 - -/*!\brief The maximum number of reference buffers that a VP9 encoder may use. - */ -#define VP9_MAXIMUM_REF_BUFFERS 8 - -/*!\brief External frame buffer - * - * This structure holds allocated frame buffers used by the decoder. - */ -typedef struct vpx_codec_frame_buffer { - uint8_t *data; /**< Pointer to the data buffer */ - size_t size; /**< Size of data in bytes */ - void *priv; /**< Frame's private data */ -} vpx_codec_frame_buffer_t; - -/*!\brief get frame buffer callback prototype - * - * This callback is invoked by the decoder to retrieve data for the frame - * buffer in order for the decode call to complete. The callback must - * allocate at least min_size in bytes and assign it to fb->data. The callback - * must zero out all the data allocated. Then the callback must set fb->size - * to the allocated size. The application does not need to align the allocated - * data. The callback is triggered when the decoder needs a frame buffer to - * decode a compressed image into. This function may be called more than once - * for every call to vpx_codec_decode. The application may set fb->priv to - * some data which will be passed back in the ximage and the release function - * call. |fb| is guaranteed to not be NULL. On success the callback must - * return 0. Any failure the callback must return a value less than 0. - * - * \param[in] priv Callback's private data - * \param[in] new_size Size in bytes needed by the buffer - * \param[in,out] fb Pointer to vpx_codec_frame_buffer_t - */ -typedef int (*vpx_get_frame_buffer_cb_fn_t)( - void *priv, size_t min_size, vpx_codec_frame_buffer_t *fb); - -/*!\brief release frame buffer callback prototype - * - * This callback is invoked by the decoder when the frame buffer is not - * referenced by any other buffers. |fb| is guaranteed to not be NULL. On - * success the callback must return 0. Any failure the callback must return - * a value less than 0. - * - * \param[in] priv Callback's private data - * \param[in] fb Pointer to vpx_codec_frame_buffer_t - */ -typedef int (*vpx_release_frame_buffer_cb_fn_t)( - void *priv, vpx_codec_frame_buffer_t *fb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VPX_FRAME_BUFFER_H_ diff --git a/thirdparty/libvpx/vpx/vpx_image.h b/thirdparty/libvpx/vpx/vpx_image.h deleted file mode 100644 index 7958c69806..0000000000 --- a/thirdparty/libvpx/vpx/vpx_image.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Describes the vpx image descriptor and associated operations - * - */ -#ifndef VPX_VPX_IMAGE_H_ -#define VPX_VPX_IMAGE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/ - - -#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */ -#define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */ -#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel. */ -#define VPX_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */ - - /*!\brief List of supported image formats */ - typedef enum vpx_img_fmt { - VPX_IMG_FMT_NONE, - VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */ - VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */ - VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */ - VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */ - VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */ - VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */ - VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */ - VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */ - VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */ - VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */ - VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */ - VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */ - VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */ - VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ - VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, - VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */ - VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, - VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, - VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, - VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7, - VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 6, - VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH, - VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH, - VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH, - VPX_IMG_FMT_I44016 = VPX_IMG_FMT_I440 | VPX_IMG_FMT_HIGHBITDEPTH - } vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */ - - /*!\brief List of supported color spaces */ - typedef enum vpx_color_space { - VPX_CS_UNKNOWN = 0, /**< Unknown */ - VPX_CS_BT_601 = 1, /**< BT.601 */ - VPX_CS_BT_709 = 2, /**< BT.709 */ - VPX_CS_SMPTE_170 = 3, /**< SMPTE.170 */ - VPX_CS_SMPTE_240 = 4, /**< SMPTE.240 */ - VPX_CS_BT_2020 = 5, /**< BT.2020 */ - VPX_CS_RESERVED = 6, /**< Reserved */ - VPX_CS_SRGB = 7 /**< sRGB */ - } vpx_color_space_t; /**< alias for enum vpx_color_space */ - - /*!\brief List of supported color range */ - typedef enum vpx_color_range { - VPX_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */ - VPX_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */ - } vpx_color_range_t; /**< alias for enum vpx_color_range */ - - /**\brief Image Descriptor */ - typedef struct vpx_image { - vpx_img_fmt_t fmt; /**< Image Format */ - vpx_color_space_t cs; /**< Color Space */ - vpx_color_range_t range; /**< Color Range */ - - /* Image storage dimensions */ - unsigned int w; /**< Stored image width */ - unsigned int h; /**< Stored image height */ - unsigned int bit_depth; /**< Stored image bit-depth */ - - /* Image display dimensions */ - unsigned int d_w; /**< Displayed image width */ - unsigned int d_h; /**< Displayed image height */ - - /* Image intended rendering dimensions */ - unsigned int r_w; /**< Intended rendering image width */ - unsigned int r_h; /**< Intended rendering image height */ - - /* Chroma subsampling info */ - unsigned int x_chroma_shift; /**< subsampling order, X */ - unsigned int y_chroma_shift; /**< subsampling order, Y */ - - /* Image data pointers. */ -#define VPX_PLANE_PACKED 0 /**< To be used for all packed formats */ -#define VPX_PLANE_Y 0 /**< Y (Luminance) plane */ -#define VPX_PLANE_U 1 /**< U (Chroma) plane */ -#define VPX_PLANE_V 2 /**< V (Chroma) plane */ -#define VPX_PLANE_ALPHA 3 /**< A (Transparency) plane */ - unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */ - int stride[4]; /**< stride between rows for each plane */ - - int bps; /**< bits per sample (for packed formats) */ - - /* The following member may be set by the application to associate data - * with this image. - */ - void *user_priv; /**< may be set by the application to associate data - * with this image. */ - - /* The following members should be treated as private. */ - unsigned char *img_data; /**< private */ - int img_data_owner; /**< private */ - int self_allocd; /**< private */ - - void *fb_priv; /**< Frame buffer data associated with the image. */ - } vpx_image_t; /**< alias for struct vpx_image */ - - /**\brief Representation of a rectangle on a surface */ - typedef struct vpx_image_rect { - unsigned int x; /**< leftmost column */ - unsigned int y; /**< topmost row */ - unsigned int w; /**< width */ - unsigned int h; /**< height */ - } vpx_image_rect_t; /**< alias for struct vpx_image_rect */ - - /*!\brief Open a descriptor, allocating storage for the underlying image - * - * Returns a descriptor for storing an image of the given format. The - * storage for the descriptor is allocated on the heap. - * - * \param[in] img Pointer to storage for descriptor. If this parameter - * is NULL, the storage for the descriptor will be - * allocated on the heap. - * \param[in] fmt Format for the image - * \param[in] d_w Width of the image - * \param[in] d_h Height of the image - * \param[in] align Alignment, in bytes, of the image buffer and - * each row in the image(stride). - * - * \return Returns a pointer to the initialized image descriptor. If the img - * parameter is non-null, the value of the img parameter will be - * returned. - */ - vpx_image_t *vpx_img_alloc(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int align); - - /*!\brief Open a descriptor, using existing storage for the underlying image - * - * Returns a descriptor for storing an image of the given format. The - * storage for descriptor has been allocated elsewhere, and a descriptor is - * desired to "wrap" that storage. - * - * \param[in] img Pointer to storage for descriptor. If this parameter - * is NULL, the storage for the descriptor will be - * allocated on the heap. - * \param[in] fmt Format for the image - * \param[in] d_w Width of the image - * \param[in] d_h Height of the image - * \param[in] align Alignment, in bytes, of each row in the image. - * \param[in] img_data Storage to use for the image - * - * \return Returns a pointer to the initialized image descriptor. If the img - * parameter is non-null, the value of the img parameter will be - * returned. - */ - vpx_image_t *vpx_img_wrap(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int align, - unsigned char *img_data); - - - /*!\brief Set the rectangle identifying the displayed portion of the image - * - * Updates the displayed rectangle (aka viewport) on the image surface to - * match the specified coordinates and size. - * - * \param[in] img Image descriptor - * \param[in] x leftmost column - * \param[in] y topmost row - * \param[in] w width - * \param[in] h height - * - * \return 0 if the requested rectangle is valid, nonzero otherwise. - */ - int vpx_img_set_rect(vpx_image_t *img, - unsigned int x, - unsigned int y, - unsigned int w, - unsigned int h); - - - /*!\brief Flip the image vertically (top for bottom) - * - * Adjusts the image descriptor's pointers and strides to make the image - * be referenced upside-down. - * - * \param[in] img Image descriptor - */ - void vpx_img_flip(vpx_image_t *img); - - /*!\brief Close an image descriptor - * - * Frees all allocated storage associated with an image descriptor. - * - * \param[in] img Image descriptor - */ - void vpx_img_free(vpx_image_t *img); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VPX_IMAGE_H_ diff --git a/thirdparty/libvpx/vpx/vpx_integer.h b/thirdparty/libvpx/vpx/vpx_integer.h deleted file mode 100644 index 829c9d132c..0000000000 --- a/thirdparty/libvpx/vpx/vpx_integer.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_VPX_INTEGER_H_ -#define VPX_VPX_INTEGER_H_ - -/* get ptrdiff_t, size_t, wchar_t, NULL */ -#include - -#if defined(_MSC_VER) -#define VPX_FORCE_INLINE __forceinline -#define VPX_INLINE __inline -#else -#define VPX_FORCE_INLINE __inline__ __attribute__(always_inline) -// TODO(jbb): Allow a way to force inline off for older compilers. -#define VPX_INLINE inline -#endif - -#if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES) -typedef signed char int8_t; -typedef signed short int16_t; -typedef signed int int32_t; - -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; - -#if (defined(_MSC_VER) && (_MSC_VER < 1600)) -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; -#define INT64_MAX _I64_MAX -#define INT32_MAX _I32_MAX -#define INT32_MIN _I32_MIN -#define INT16_MAX _I16_MAX -#define INT16_MIN _I16_MIN -#endif - -#ifndef _UINTPTR_T_DEFINED -typedef size_t uintptr_t; -#endif - -#else - -/* Most platforms have the C99 standard integer types. */ - -#if defined(__cplusplus) -# if !defined(__STDC_FORMAT_MACROS) -# define __STDC_FORMAT_MACROS -# endif -# if !defined(__STDC_LIMIT_MACROS) -# define __STDC_LIMIT_MACROS -# endif -#endif // __cplusplus - -#include - -#endif - -/* VS2010 defines stdint.h, but not inttypes.h */ -#if defined(_MSC_VER) && _MSC_VER < 1800 -#define PRId64 "I64d" -#else -#include -#endif - -#endif // VPX_VPX_INTEGER_H_ diff --git a/thirdparty/libvpx/vpx_config.asm b/thirdparty/libvpx/vpx_config.asm deleted file mode 100644 index 8f2119e7bc..0000000000 --- a/thirdparty/libvpx/vpx_config.asm +++ /dev/null @@ -1,65 +0,0 @@ -%ifdef X86_32 - ARCH_X86 equ 1 - ARCH_X86_64 equ 0 -%elifdef X86_64 - ARCH_X86 equ 0 - ARCH_X86_64 equ 1 -%endif - -HAVE_VPX_PORTS equ 1 -CONFIG_DEPENDENCY_TRACKING equ 0 -CONFIG_EXTERNAL_BUILD equ 0 -CONFIG_INSTALL_DOCS equ 0 -CONFIG_INSTALL_BINS equ 0 -CONFIG_INSTALL_LIBS equ 0 -CONFIG_INSTALL_SRCS equ 0 -CONFIG_USE_X86INC equ 1 -CONFIG_DEBUG equ 0 -CONFIG_GPROF equ 0 -CONFIG_GCOV equ 0 -CONFIG_RVCT equ 0 -CONFIG_PIC equ 1 ;TODO: autodetect -CONFIG_CODEC_SRCS equ 0 -CONFIG_DEBUG_LIBS equ 0 -CONFIG_DEQUANT_TOKENS equ 0 -CONFIG_DC_RECON equ 0 -CONFIG_RUNTIME_CPU_DETECT equ 1 -CONFIG_POSTPROC equ 0 -CONFIG_VP9_POSTPROC equ 0 -CONFIG_MULTITHREAD equ 1 -CONFIG_INTERNAL_STATS equ 0 -CONFIG_VP8_ENCODER equ 0 -CONFIG_VP8_DECODER equ 1 -CONFIG_VP9_ENCODER equ 0 -CONFIG_VP9_DECODER equ 1 -CONFIG_VP8 equ 1 -CONFIG_VP9 equ 1 -CONFIG_ENCODERS equ 0 -CONFIG_DECODERS equ 1 -CONFIG_STATIC_MSVCRT equ 0 -CONFIG_SPATIAL_RESAMPLING equ 0 -CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 -CONFIG_SHARED equ 0 -CONFIG_STATIC equ 0 -CONFIG_SMALL equ 0 -CONFIG_POSTPROC_VISUALIZER equ 0 -CONFIG_OS_SUPPORT equ 1 -CONFIG_UNIT_TESTS equ 0 -CONFIG_WEBM_IO equ 0 -CONFIG_LIBYUV equ 0 -CONFIG_DECODE_PERF_TESTS equ 0 -CONFIG_ENCODE_PERF_TESTS equ 0 -CONFIG_MULTI_RES_ENCODING equ 0 -CONFIG_TEMPORAL_DENOISING equ 1 -CONFIG_VP9_TEMPORAL_DENOISING equ 0 -CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 -CONFIG_VP9_HIGHBITDEPTH equ 0 -CONFIG_BETTER_HW_COMPATIBILITY equ 0 -CONFIG_EXPERIMENTAL equ 0 -CONFIG_SIZE_LIMIT equ 0 -CONFIG_SPATIAL_SVC equ 0 -CONFIG_FP_MB_STATS equ 0 -CONFIG_EMULATE_HARDWARE equ 0 -CONFIG_MISC_FIXES equ 0 diff --git a/thirdparty/libvpx/vpx_config.h b/thirdparty/libvpx/vpx_config.h deleted file mode 100644 index e8e91fa6ef..0000000000 --- a/thirdparty/libvpx/vpx_config.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */ -/* */ -/* Use of this source code is governed by a BSD-style license */ -/* that can be found in the LICENSE file in the root of the source */ -/* tree. An additional intellectual property rights grant can be found */ -/* in the file PATENTS. All contributing project authors may */ -/* be found in the AUTHORS file in the root of the source tree. */ -/* This file automatically generated by configure. Do not edit! */ -#ifndef VPX_CONFIG_H -#define VPX_CONFIG_H -#define RESTRICT -#if defined(_MSC_VER) && (_MSC_VER < 1900) - #define INLINE __inline -#else - #define INLINE inline -#endif - -#define HAVE_MIPS32 0 -#define HAVE_MEDIA 0 - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) - #define ARCH_X86 1 - #define ARCH_X86_64 0 - - #define ARCH_ARM 0 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 - - #define HAVE_MMX 1 - #define HAVE_SSE2 1 - #define HAVE_SSSE3 1 - #define HAVE_AVX2 0 -#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) - #define ARCH_X86 0 - #define ARCH_X86_64 1 - - #define ARCH_ARM 0 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 - - #define HAVE_MMX 1 - #define HAVE_SSE2 1 - #define HAVE_SSSE3 1 - #define HAVE_AVX2 0 -#elif defined(__arm__) || defined(__TARGET_ARCH_ARM) || defined(_M_ARM) - #define ARCH_X86 0 - #define ARCH_X86_64 0 - - #define ARCH_ARM 1 - #define HAVE_NEON 1 - #define HAVE_NEON_ASM 1 -#elif defined(__aarch64__) - #define ARCH_X86 0 - #define ARCH_X86_64 0 - - #define ARCH_ARM 1 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 -#else - #define ARCH_X86 0 - #define ARCH_X86_64 0 - - #define ARCH_ARM 0 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 -#endif - -#define CONFIG_BIG_ENDIAN 0 //TODO: Autodetect - -#ifdef __EMSCRIPTEN__ -#define CONFIG_MULTITHREAD 0 -#else -#define CONFIG_MULTITHREAD 1 -#endif - -#ifdef _WIN32 - #define HAVE_PTHREAD_H 0 - #define HAVE_UNISTD_H 0 -#else - #define HAVE_PTHREAD_H 1 - #define HAVE_UNISTD_H 1 -#endif - -/**/ - -#define HAVE_VPX_PORTS 1 -#define CONFIG_DEPENDENCY_TRACKING 0 -#define CONFIG_EXTERNAL_BUILD 0 -#define CONFIG_INSTALL_DOCS 0 -#define CONFIG_INSTALL_BINS 0 -#define CONFIG_INSTALL_LIBS 0 -#define CONFIG_INSTALL_SRCS 0 -#define CONFIG_DEBUG 0 -#define CONFIG_GPROF 0 -#define CONFIG_GCOV 0 -#define CONFIG_RVCT 0 -#define CONFIG_CODEC_SRCS 0 -#define CONFIG_DEBUG_LIBS 0 -#define CONFIG_DEQUANT_TOKENS 0 -#define CONFIG_DC_RECON 0 -#define CONFIG_RUNTIME_CPU_DETECT 1 -#define CONFIG_POSTPROC 0 -#define CONFIG_VP9_POSTPROC 0 -#define CONFIG_INTERNAL_STATS 0 -#define CONFIG_VP8_ENCODER 0 -#define CONFIG_VP8_DECODER 1 -#define CONFIG_VP9_ENCODER 0 -#define CONFIG_VP9_DECODER 1 -#define CONFIG_VP8 1 -#define CONFIG_VP9 1 -#define CONFIG_ENCODERS 0 -#define CONFIG_DECODERS 1 -#define CONFIG_STATIC_MSVCRT 0 -#define CONFIG_SPATIAL_RESAMPLING 0 -#define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 -#define CONFIG_SHARED 0 -#define CONFIG_STATIC 0 -#define CONFIG_SMALL 0 -#define CONFIG_POSTPROC_VISUALIZER 0 -#define CONFIG_OS_SUPPORT 1 -#define CONFIG_UNIT_TESTS 0 -#define CONFIG_WEBM_IO 0 -#define CONFIG_LIBYUV 0 -#define CONFIG_DECODE_PERF_TESTS 0 -#define CONFIG_ENCODE_PERF_TESTS 0 -#define CONFIG_MULTI_RES_ENCODING 0 -#define CONFIG_TEMPORAL_DENOISING 0 -#define CONFIG_VP9_TEMPORAL_DENOISING 0 -#define CONFIG_COEFFICIENT_RANGE_CHECKING 0 -#define CONFIG_VP9_HIGHBITDEPTH 0 -#define CONFIG_BETTER_HW_COMPATIBILITY 0 -#define CONFIG_EXPERIMENTAL 0 -#define CONFIG_SIZE_LIMIT 0 -#define CONFIG_SPATIAL_SVC 0 -#define CONFIG_FP_MB_STATS 0 -#define CONFIG_EMULATE_HARDWARE 0 -#define CONFIG_MISC_FIXES 0 -#endif /* VPX_CONFIG_H */ diff --git a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm deleted file mode 100644 index b2846c410b..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm +++ /dev/null @@ -1,643 +0,0 @@ -; This file was created from a .asm file -; using the ads2armasm_ms.pl script. -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vpx_v_predictor_4x4_neon| - EXPORT |vpx_v_predictor_8x8_neon| - EXPORT |vpx_v_predictor_16x16_neon| - EXPORT |vpx_v_predictor_32x32_neon| - EXPORT |vpx_h_predictor_4x4_neon| - EXPORT |vpx_h_predictor_8x8_neon| - EXPORT |vpx_h_predictor_16x16_neon| - EXPORT |vpx_h_predictor_32x32_neon| - EXPORT |vpx_tm_predictor_4x4_neon| - EXPORT |vpx_tm_predictor_8x8_neon| - EXPORT |vpx_tm_predictor_16x16_neon| - EXPORT |vpx_tm_predictor_32x32_neon| - - - - AREA |.text|, CODE, READONLY, ALIGN=2 - -;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_4x4_neon| PROC - vld1.32 {d0[0]}, [r2] - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - bx lr - ENDP ; |vpx_v_predictor_4x4_neon| - ALIGN 4 - -;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_8x8_neon| PROC - vld1.8 {d0}, [r2] - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - bx lr - ENDP ; |vpx_v_predictor_8x8_neon| - ALIGN 4 - -;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_16x16_neon| PROC - vld1.8 {q0}, [r2] - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - bx lr - ENDP ; |vpx_v_predictor_16x16_neon| - ALIGN 4 - -;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_32x32_neon| PROC - vld1.8 {q0, q1}, [r2] - mov r2, #2 -loop_v - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - subs r2, r2, #1 - bgt loop_v - bx lr - ENDP ; |vpx_v_predictor_32x32_neon| - ALIGN 4 - -;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_4x4_neon| PROC - vld1.32 {d1[0]}, [r3] - vdup.8 d0, d1[0] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[1] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[2] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[3] - vst1.32 {d0[0]}, [r0], r1 - bx lr - ENDP ; |vpx_h_predictor_4x4_neon| - ALIGN 4 - -;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_8x8_neon| PROC - vld1.64 {d1}, [r3] - vdup.8 d0, d1[0] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[1] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[2] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[3] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[4] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[5] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[6] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[7] - vst1.64 {d0}, [r0], r1 - bx lr - ENDP ; |vpx_h_predictor_8x8_neon| - ALIGN 4 - -;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_16x16_neon| PROC - vld1.8 {q1}, [r3] - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0], r1 - bx lr - ENDP ; |vpx_h_predictor_16x16_neon| - ALIGN 4 - -;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_32x32_neon| PROC - sub r1, r1, #16 - mov r2, #2 -loop_h - vld1.8 {q1}, [r3]! - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - subs r2, r2, #1 - bgt loop_h - bx lr - ENDP ; |vpx_h_predictor_32x32_neon| - ALIGN 4 - -;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_4x4_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.u8 {d0[]}, [r12] - - ; Load above 4 pixels - vld1.32 {d2[0]}, [r2] - - ; Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - ; Load left row by row and compute left + (above - ytop_left) - ; 1st row and 2nd row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3]! - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - - ; 3rd row and 4th row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3] - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - bx lr - ENDP ; |vpx_tm_predictor_4x4_neon| - ALIGN 4 - -;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_8x8_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - ; preload 8 left - vld1.8 {d30}, [r3] - - ; Load above 8 pixels - vld1.64 {d2}, [r2] - - vmovl.u8 q10, d30 - - ; Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - ; Load left row by row and compute left + (above - ytop_left) - ; 1st row and 2nd row - vdup.16 q0, d20[0] - vdup.16 q1, d20[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - ; 3rd row and 4th row - vdup.16 q8, d20[2] - vdup.16 q9, d20[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - ; 5th row and 6th row - vdup.16 q0, d21[0] - vdup.16 q1, d21[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - ; 7th row and 8th row - vdup.16 q8, d21[2] - vdup.16 q9, d21[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - bx lr - ENDP ; |vpx_tm_predictor_8x8_neon| - ALIGN 4 - -;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_16x16_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - ; Load above 8 pixels - vld1.8 {q1}, [r2] - - ; preload 8 left into r12 - vld1.8 {d18}, [r3]! - - ; Compute above - ytop_left - vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d0 - - vmovl.u8 q10, d18 - - ; Load left row by row and compute left + (above - ytop_left) - ; Process 8 rows in each single loop and loop 2 times to process 16 rows. - mov r2, #2 - -loop_16x16_neon - ; Process two rows. - vdup.16 q0, d20[0] - vdup.16 q8, d20[1] - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d20[2] ; proload next 2 rows data - vdup.16 q8, d20[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - ; Process two rows. - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[0] ; proload next 2 rows data - vdup.16 q8, d21[1] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[2] ; proload next 2 rows data - vdup.16 q8, d21[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vld1.8 {d18}, [r3]! ; preload 8 left into r12 - vmovl.u8 q10, d18 - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - subs r2, r2, #1 - bgt loop_16x16_neon - - bx lr - ENDP ; |vpx_tm_predictor_16x16_neon| - ALIGN 4 - -;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_32x32_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - ; Load above 32 pixels - vld1.8 {q1}, [r2]! - vld1.8 {q2}, [r2] - - ; preload 8 left pixels - vld1.8 {d26}, [r3]! - - ; Compute above - ytop_left - vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d0 - vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d0 - - vmovl.u8 q3, d26 - - ; Load left row by row and compute left + (above - ytop_left) - ; Process 8 rows in each single loop and loop 4 times to process 32 rows. - mov r2, #4 - -loop_32x32_neon - ; Process two rows. - vdup.16 q0, d6[0] - vdup.16 q2, d6[1] - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q1, d6[2] - vdup.16 q2, d6[3] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q1, q8 - vadd.s16 q13, q1, q9 - vadd.s16 q14, q1, q10 - vadd.s16 q15, q1, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[0] - vdup.16 q2, d7[1] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[2] - vdup.16 q2, d7[3] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vld1.8 {d0}, [r3]! ; preload 8 left pixels - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vmovl.u8 q3, d0 - vst1.64 {d24-d27}, [r0], r1 - - subs r2, r2, #1 - bgt loop_32x32_neon - - bx lr - ENDP ; |vpx_tm_predictor_32x32_neon| - ALIGN 4 - - END diff --git a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm deleted file mode 100644 index 9c3736faf8..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm +++ /dev/null @@ -1,641 +0,0 @@ -; This file was created from a .asm file -; using the ads2armasm_ms.pl script. -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vpx_lpf_horizontal_edge_8_neon| - EXPORT |vpx_lpf_horizontal_edge_16_neon| - EXPORT |vpx_lpf_vertical_16_neon| - - AREA |.text|, CODE, READONLY, ALIGN=2 - -; void mb_lpf_horizontal_edge(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh, -; int count) -; r0 uint8_t *s, -; r1 int p, /* pitch */ -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh, -; r12 int count -|mb_lpf_horizontal_edge| PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] ; load thresh - -h_count - vld1.8 {d16[]}, [r2] ; load *blimit - vld1.8 {d17[]}, [r3] ; load *limit - vld1.8 {d18[]}, [r4] ; load *thresh - - sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines - - vld1.u8 {d0}, [r8@64], r1 ; p7 - vld1.u8 {d1}, [r8@64], r1 ; p6 - vld1.u8 {d2}, [r8@64], r1 ; p5 - vld1.u8 {d3}, [r8@64], r1 ; p4 - vld1.u8 {d4}, [r8@64], r1 ; p3 - vld1.u8 {d5}, [r8@64], r1 ; p2 - vld1.u8 {d6}, [r8@64], r1 ; p1 - vld1.u8 {d7}, [r8@64], r1 ; p0 - vld1.u8 {d8}, [r8@64], r1 ; q0 - vld1.u8 {d9}, [r8@64], r1 ; q1 - vld1.u8 {d10}, [r8@64], r1 ; q2 - vld1.u8 {d11}, [r8@64], r1 ; q3 - vld1.u8 {d12}, [r8@64], r1 ; q4 - vld1.u8 {d13}, [r8@64], r1 ; q5 - vld1.u8 {d14}, [r8@64], r1 ; q6 - vld1.u8 {d15}, [r8@64], r1 ; q7 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq h_mbfilter - - ; flat && mask were not set for any of the channels. Just store the values - ; from filter. - sub r8, r0, r1, lsl #1 - - vst1.u8 {d25}, [r8@64], r1 ; store op1 - vst1.u8 {d24}, [r8@64], r1 ; store op0 - vst1.u8 {d23}, [r8@64], r1 ; store oq0 - vst1.u8 {d26}, [r8@64], r1 ; store oq1 - - b h_next - -h_mbfilter - tst r7, #2 - beq h_wide_mbfilter - - ; flat2 was not set for any of the channels. Just store the values from - ; mbfilter. - sub r8, r0, r1, lsl #1 - sub r8, r8, r1 - - vst1.u8 {d18}, [r8@64], r1 ; store op2 - vst1.u8 {d19}, [r8@64], r1 ; store op1 - vst1.u8 {d20}, [r8@64], r1 ; store op0 - vst1.u8 {d21}, [r8@64], r1 ; store oq0 - vst1.u8 {d22}, [r8@64], r1 ; store oq1 - vst1.u8 {d23}, [r8@64], r1 ; store oq2 - - b h_next - -h_wide_mbfilter - sub r8, r0, r1, lsl #3 - add r8, r8, r1 - - vst1.u8 {d16}, [r8@64], r1 ; store op6 - vst1.u8 {d24}, [r8@64], r1 ; store op5 - vst1.u8 {d25}, [r8@64], r1 ; store op4 - vst1.u8 {d26}, [r8@64], r1 ; store op3 - vst1.u8 {d27}, [r8@64], r1 ; store op2 - vst1.u8 {d18}, [r8@64], r1 ; store op1 - vst1.u8 {d19}, [r8@64], r1 ; store op0 - vst1.u8 {d20}, [r8@64], r1 ; store oq0 - vst1.u8 {d21}, [r8@64], r1 ; store oq1 - vst1.u8 {d22}, [r8@64], r1 ; store oq2 - vst1.u8 {d23}, [r8@64], r1 ; store oq3 - vst1.u8 {d1}, [r8@64], r1 ; store oq4 - vst1.u8 {d2}, [r8@64], r1 ; store oq5 - vst1.u8 {d3}, [r8@64], r1 ; store oq6 - -h_next - add r0, r0, #8 - subs r12, r12, #1 - bne h_count - - vpop {d8-d15} - pop {r4-r8, pc} - - ENDP ; |mb_lpf_horizontal_edge| - ALIGN 4 - -; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh) -; r0 uint8_t *s, -; r1 int pitch, -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh -|vpx_lpf_horizontal_edge_8_neon| PROC - mov r12, #1 - b mb_lpf_horizontal_edge - ENDP ; |vpx_lpf_horizontal_edge_8_neon| - ALIGN 4 - -; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh) -; r0 uint8_t *s, -; r1 int pitch, -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh -|vpx_lpf_horizontal_edge_16_neon| PROC - mov r12, #2 - b mb_lpf_horizontal_edge - ENDP ; |vpx_lpf_horizontal_edge_16_neon| - ALIGN 4 - -; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh) -; r0 uint8_t *s, -; r1 int p, /* pitch */ -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh, -|vpx_lpf_vertical_16_neon| PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] ; load thresh - - vld1.8 {d16[]}, [r2] ; load *blimit - vld1.8 {d17[]}, [r3] ; load *limit - vld1.8 {d18[]}, [r4] ; load *thresh - - sub r8, r0, #8 - - vld1.8 {d0}, [r8@64], r1 - vld1.8 {d8}, [r0@64], r1 - vld1.8 {d1}, [r8@64], r1 - vld1.8 {d9}, [r0@64], r1 - vld1.8 {d2}, [r8@64], r1 - vld1.8 {d10}, [r0@64], r1 - vld1.8 {d3}, [r8@64], r1 - vld1.8 {d11}, [r0@64], r1 - vld1.8 {d4}, [r8@64], r1 - vld1.8 {d12}, [r0@64], r1 - vld1.8 {d5}, [r8@64], r1 - vld1.8 {d13}, [r0@64], r1 - vld1.8 {d6}, [r8@64], r1 - vld1.8 {d14}, [r0@64], r1 - vld1.8 {d7}, [r8@64], r1 - vld1.8 {d15}, [r0@64], r1 - - sub r0, r0, r1, lsl #3 - - vtrn.32 q0, q2 - vtrn.32 q1, q3 - vtrn.32 q4, q6 - vtrn.32 q5, q7 - - vtrn.16 q0, q1 - vtrn.16 q2, q3 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - vtrn.8 d0, d1 - vtrn.8 d2, d3 - vtrn.8 d4, d5 - vtrn.8 d6, d7 - - vtrn.8 d8, d9 - vtrn.8 d10, d11 - vtrn.8 d12, d13 - vtrn.8 d14, d15 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq v_mbfilter - - ; flat && mask were not set for any of the channels. Just store the values - ; from filter. - sub r8, r0, #2 - - vswp d23, d25 - - vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 - vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 - vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 - vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 - vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 - vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 - vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 - vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 - - b v_end - -v_mbfilter - tst r7, #2 - beq v_wide_mbfilter - - ; flat2 was not set for any of the channels. Just store the values from - ; mbfilter. - sub r8, r0, #3 - - vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 - vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 - vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 - vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 - vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 - vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 - vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 - vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 - vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 - vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 - vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 - vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 - vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 - vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 - vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 - vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 - - b v_end - -v_wide_mbfilter - sub r8, r0, #8 - - vtrn.32 d0, d26 - vtrn.32 d16, d27 - vtrn.32 d24, d18 - vtrn.32 d25, d19 - - vtrn.16 d0, d24 - vtrn.16 d16, d25 - vtrn.16 d26, d18 - vtrn.16 d27, d19 - - vtrn.8 d0, d16 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - vtrn.8 d18, d19 - - vtrn.32 d20, d1 - vtrn.32 d21, d2 - vtrn.32 d22, d3 - vtrn.32 d23, d15 - - vtrn.16 d20, d22 - vtrn.16 d21, d23 - vtrn.16 d1, d3 - vtrn.16 d2, d15 - - vtrn.8 d20, d21 - vtrn.8 d22, d23 - vtrn.8 d1, d2 - vtrn.8 d3, d15 - - vst1.8 {d0}, [r8@64], r1 - vst1.8 {d20}, [r0@64], r1 - vst1.8 {d16}, [r8@64], r1 - vst1.8 {d21}, [r0@64], r1 - vst1.8 {d24}, [r8@64], r1 - vst1.8 {d22}, [r0@64], r1 - vst1.8 {d25}, [r8@64], r1 - vst1.8 {d23}, [r0@64], r1 - vst1.8 {d26}, [r8@64], r1 - vst1.8 {d1}, [r0@64], r1 - vst1.8 {d27}, [r8@64], r1 - vst1.8 {d2}, [r0@64], r1 - vst1.8 {d18}, [r8@64], r1 - vst1.8 {d3}, [r0@64], r1 - vst1.8 {d19}, [r8@64], r1 - vst1.8 {d15}, [r0@64], r1 - -v_end - vpop {d8-d15} - pop {r4-r8, pc} - - ENDP ; |vpx_lpf_vertical_16_neon| - ALIGN 4 - -; void vpx_wide_mbfilter_neon(); -; This is a helper function for the loopfilters. The invidual functions do the -; necessary load, transpose (if necessary) and store. -; -; r0-r3 PRESERVE -; d16 blimit -; d17 limit -; d18 thresh -; d0 p7 -; d1 p6 -; d2 p5 -; d3 p4 -; d4 p3 -; d5 p2 -; d6 p1 -; d7 p0 -; d8 q0 -; d9 q1 -; d10 q2 -; d11 q3 -; d12 q4 -; d13 q5 -; d14 q6 -; d15 q7 -|vpx_wide_mbfilter_neon| PROC - mov r7, #0 - - ; filter_mask - vabd.u8 d19, d4, d5 ; abs(p3 - p2) - vabd.u8 d20, d5, d6 ; abs(p2 - p1) - vabd.u8 d21, d6, d7 ; abs(p1 - p0) - vabd.u8 d22, d9, d8 ; abs(q1 - q0) - vabd.u8 d23, d10, d9 ; abs(q2 - q1) - vabd.u8 d24, d11, d10 ; abs(q3 - q2) - - ; only compare the largest value to limit - vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) - vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) - vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) - vmax.u8 d19, d19, d20 - - vabd.u8 d24, d7, d8 ; abs(p0 - q0) - - vmax.u8 d19, d19, d23 - - vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) - vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 - - ; abs () > limit - vcge.u8 d19, d17, d19 - - ; flatmask4 - vabd.u8 d25, d7, d5 ; abs(p0 - p2) - vabd.u8 d26, d8, d10 ; abs(q0 - q2) - vabd.u8 d27, d4, d7 ; abs(p3 - p0) - vabd.u8 d28, d11, d8 ; abs(q3 - q0) - - ; only compare the largest value to thresh - vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) - vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) - vmax.u8 d25, d25, d26 - vmax.u8 d20, d20, d25 - - vshr.u8 d23, d23, #1 ; a = a / 2 - vqadd.u8 d24, d24, d23 ; a = b + a - - vmov.u8 d30, #1 - vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 - - vcge.u8 d20, d30, d20 ; flat - - vand d19, d19, d24 ; mask - - ; hevmask - vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 - vorr d21, d21, d22 ; hev - - vand d16, d20, d19 ; flat && mask - vmov r5, r6, d16 - - ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) - vabd.u8 d22, d3, d7 ; abs(p4 - p0) - vabd.u8 d23, d12, d8 ; abs(q4 - q0) - vabd.u8 d24, d7, d2 ; abs(p0 - p5) - vabd.u8 d25, d8, d13 ; abs(q0 - q5) - vabd.u8 d26, d1, d7 ; abs(p6 - p0) - vabd.u8 d27, d14, d8 ; abs(q6 - q0) - vabd.u8 d28, d0, d7 ; abs(p7 - p0) - vabd.u8 d29, d15, d8 ; abs(q7 - q0) - - ; only compare the largest value to thresh - vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) - vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) - vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) - vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) - - vmax.u8 d26, d22, d23 - vmax.u8 d27, d24, d25 - vmax.u8 d23, d26, d27 - - vcge.u8 d18, d30, d23 ; flat2 - - vmov.u8 d22, #0x80 - - orrs r5, r5, r6 ; Check for 0 - orreq r7, r7, #1 ; Only do filter branch - - vand d17, d18, d16 ; flat2 && flat && mask - vmov r5, r6, d17 - - ; mbfilter() function - - ; filter() function - ; convert to signed - veor d23, d8, d22 ; qs0 - veor d24, d7, d22 ; ps0 - veor d25, d6, d22 ; ps1 - veor d26, d9, d22 ; qs1 - - vmov.u8 d27, #3 - - vsub.s8 d28, d23, d24 ; ( qs0 - ps0) - vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) - vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) - vand d29, d29, d21 ; filter &= hev - vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) - vmov.u8 d29, #4 - - ; filter = clamp(filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d28, q15 - - vand d28, d28, d19 ; filter &= mask - - vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) - vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) - vshr.s8 d30, d30, #3 ; filter2 >>= 3 - vshr.s8 d29, d29, #3 ; filter1 >>= 3 - - - vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) - vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) - - ; outer tap adjustments: ++filter1 >> 1 - vrshr.s8 d29, d29, #1 - vbic d29, d29, d21 ; filter &= ~hev - - vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) - vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) - - veor d24, d24, d22 ; *f_op0 = u^0x80 - veor d23, d23, d22 ; *f_oq0 = u^0x80 - veor d25, d25, d22 ; *f_op1 = u^0x80 - veor d26, d26, d22 ; *f_oq1 = u^0x80 - - tst r7, #1 - bxne lr - - orrs r5, r5, r6 ; Check for 0 - orreq r7, r7, #2 ; Only do mbfilter branch - - ; mbfilter flat && mask branch - ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's - ; and using vibt on the q's? - vmov.u8 d29, #2 - vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 - vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 - vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 - vaddl.u8 q10, d4, d5 - vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 - vaddl.u8 q14, d6, d9 - vqrshrn.u16 d18, q15, #3 ; r_op2 - - vsub.i16 q15, q10 - vaddl.u8 q10, d4, d6 - vadd.i16 q15, q14 - vaddl.u8 q14, d7, d10 - vqrshrn.u16 d19, q15, #3 ; r_op1 - - vsub.i16 q15, q10 - vadd.i16 q15, q14 - vaddl.u8 q14, d8, d11 - vqrshrn.u16 d20, q15, #3 ; r_op0 - - vsubw.u8 q15, d4 ; oq0 = op0 - p3 - vsubw.u8 q15, d7 ; oq0 -= p0 - vadd.i16 q15, q14 - vaddl.u8 q14, d9, d11 - vqrshrn.u16 d21, q15, #3 ; r_oq0 - - vsubw.u8 q15, d5 ; oq1 = oq0 - p2 - vsubw.u8 q15, d8 ; oq1 -= q0 - vadd.i16 q15, q14 - vaddl.u8 q14, d10, d11 - vqrshrn.u16 d22, q15, #3 ; r_oq1 - - vsubw.u8 q15, d6 ; oq2 = oq0 - p1 - vsubw.u8 q15, d9 ; oq2 -= q1 - vadd.i16 q15, q14 - vqrshrn.u16 d27, q15, #3 ; r_oq2 - - ; Filter does not set op2 or oq2, so use p2 and q2. - vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) - vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) - vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) - vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) - vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) - - vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) - vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) - - tst r7, #2 - bxne lr - - ; wide_mbfilter flat2 && flat && mask branch - vmov.u8 d16, #7 - vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 - vaddl.u8 q12, d2, d3 - vaddl.u8 q13, d4, d5 - vaddl.u8 q14, d1, d6 - vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 - vadd.i16 q12, q13 - vadd.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vadd.i16 q15, q12 - vaddl.u8 q12, d0, d1 - vaddw.u8 q15, d1 - vaddl.u8 q13, d0, d2 - vadd.i16 q14, q15, q14 - vqrshrn.u16 d16, q15, #4 ; w_op6 - - vsub.i16 q15, q14, q12 - vaddl.u8 q14, d3, d10 - vqrshrn.u16 d24, q15, #4 ; w_op5 - - vsub.i16 q15, q13 - vaddl.u8 q13, d0, d3 - vadd.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vqrshrn.u16 d25, q15, #4 ; w_op4 - - vadd.i16 q15, q14 - vaddl.u8 q14, d0, d4 - vsub.i16 q15, q13 - vsub.i16 q14, q15, q14 - vqrshrn.u16 d26, q15, #4 ; w_op3 - - vaddw.u8 q15, q14, d5 ; op2 += p2 - vaddl.u8 q14, d0, d5 - vaddw.u8 q15, d12 ; op2 += q4 - vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) - vqrshrn.u16 d27, q15, #4 ; w_op2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d6 - vaddw.u8 q15, d6 ; op1 += p1 - vaddw.u8 q15, d13 ; op1 += q5 - vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) - vqrshrn.u16 d18, q15, #4 ; w_op1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d7 - vaddw.u8 q15, d7 ; op0 += p0 - vaddw.u8 q15, d14 ; op0 += q6 - vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) - vqrshrn.u16 d19, q15, #4 ; w_op0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d1, d8 - vaddw.u8 q15, d8 ; oq0 += q0 - vaddw.u8 q15, d15 ; oq0 += q7 - vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) - vqrshrn.u16 d20, q15, #4 ; w_oq0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vaddw.u8 q15, d9 ; oq1 += q1 - vaddl.u8 q4, d10, d15 - vaddw.u8 q15, d15 ; oq1 += q7 - vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) - vqrshrn.u16 d21, q15, #4 ; w_oq1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d3, d10 - vadd.i16 q15, q4 - vaddl.u8 q4, d11, d15 - vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) - vqrshrn.u16 d22, q15, #4 ; w_oq2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vadd.i16 q15, q4 - vaddl.u8 q4, d12, d15 - vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) - vqrshrn.u16 d23, q15, #4 ; w_oq3 - - vsub.i16 q15, q14 - vaddl.u8 q14, d5, d12 - vadd.i16 q15, q4 - vaddl.u8 q4, d13, d15 - vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) - vqrshrn.u16 d1, q15, #4 ; w_oq4 - - vsub.i16 q15, q14 - vaddl.u8 q14, d6, d13 - vadd.i16 q15, q4 - vaddl.u8 q4, d14, d15 - vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) - vqrshrn.u16 d2, q15, #4 ; w_oq5 - - vsub.i16 q15, q14 - vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) - vadd.i16 q15, q4 - vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) - vqrshrn.u16 d3, q15, #4 ; w_oq6 - vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) - vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) - vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) - - bx lr - ENDP ; |vpx_wide_mbfilter_neon| - ALIGN 4 - - END diff --git a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm deleted file mode 100644 index 4cf9988e65..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm +++ /dev/null @@ -1,39 +0,0 @@ -; This file was created from a .asm file -; using the ads2armasm_ms.pl script. -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vpx_push_neon| - EXPORT |vpx_pop_neon| - - - - - AREA |.text|, CODE, READONLY, ALIGN=2 - -|vpx_push_neon| PROC - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - ALIGN 4 - -|vpx_pop_neon| PROC - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - ALIGN 4 - - END - diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s b/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s deleted file mode 100644 index 3932227fc5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s +++ /dev/null @@ -1,658 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 -@ -@ Copyright (c) 2014 The WebM project authors. All Rights Reserved. -@ -@ Use of this source code is governed by a BSD-style license -@ that can be found in the LICENSE file in the root of the source -@ tree. An additional intellectual property rights grant can be found -@ in the file PATENTS. All contributing project authors may -@ be found in the AUTHORS file in the root of the source tree. -@ - - .global vpx_v_predictor_4x4_neon - .type vpx_v_predictor_4x4_neon, function - .global vpx_v_predictor_8x8_neon - .type vpx_v_predictor_8x8_neon, function - .global vpx_v_predictor_16x16_neon - .type vpx_v_predictor_16x16_neon, function - .global vpx_v_predictor_32x32_neon - .type vpx_v_predictor_32x32_neon, function - .global vpx_h_predictor_4x4_neon - .type vpx_h_predictor_4x4_neon, function - .global vpx_h_predictor_8x8_neon - .type vpx_h_predictor_8x8_neon, function - .global vpx_h_predictor_16x16_neon - .type vpx_h_predictor_16x16_neon, function - .global vpx_h_predictor_32x32_neon - .type vpx_h_predictor_32x32_neon, function - .global vpx_tm_predictor_4x4_neon - .type vpx_tm_predictor_4x4_neon, function - .global vpx_tm_predictor_8x8_neon - .type vpx_tm_predictor_8x8_neon, function - .global vpx_tm_predictor_16x16_neon - .type vpx_tm_predictor_16x16_neon, function - .global vpx_tm_predictor_32x32_neon - .type vpx_tm_predictor_32x32_neon, function - .arm - .eabi_attribute 24, 1 @Tag_ABI_align_needed - .eabi_attribute 25, 1 @Tag_ABI_align_preserved - -.text -.p2align 2 - -@void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_4x4_neon: - vpx_v_predictor_4x4_neon: @ PROC - vld1.32 {d0[0]}, [r2] - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - bx lr - .size vpx_v_predictor_4x4_neon, .-vpx_v_predictor_4x4_neon @ ENDP @ |vpx_v_predictor_4x4_neon| - -@void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_8x8_neon: - vpx_v_predictor_8x8_neon: @ PROC - vld1.8 {d0}, [r2] - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - bx lr - .size vpx_v_predictor_8x8_neon, .-vpx_v_predictor_8x8_neon @ ENDP @ |vpx_v_predictor_8x8_neon| - -@void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_16x16_neon: - vpx_v_predictor_16x16_neon: @ PROC - vld1.8 {q0}, [r2] - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - bx lr - .size vpx_v_predictor_16x16_neon, .-vpx_v_predictor_16x16_neon @ ENDP @ |vpx_v_predictor_16x16_neon| - -@void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_32x32_neon: - vpx_v_predictor_32x32_neon: @ PROC - vld1.8 {q0, q1}, [r2] - mov r2, #2 -loop_v: - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - subs r2, r2, #1 - bgt loop_v - bx lr - .size vpx_v_predictor_32x32_neon, .-vpx_v_predictor_32x32_neon @ ENDP @ |vpx_v_predictor_32x32_neon| - -@void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_4x4_neon: - vpx_h_predictor_4x4_neon: @ PROC - vld1.32 {d1[0]}, [r3] - vdup.8 d0, d1[0] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[1] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[2] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[3] - vst1.32 {d0[0]}, [r0], r1 - bx lr - .size vpx_h_predictor_4x4_neon, .-vpx_h_predictor_4x4_neon @ ENDP @ |vpx_h_predictor_4x4_neon| - -@void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_8x8_neon: - vpx_h_predictor_8x8_neon: @ PROC - vld1.64 {d1}, [r3] - vdup.8 d0, d1[0] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[1] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[2] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[3] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[4] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[5] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[6] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[7] - vst1.64 {d0}, [r0], r1 - bx lr - .size vpx_h_predictor_8x8_neon, .-vpx_h_predictor_8x8_neon @ ENDP @ |vpx_h_predictor_8x8_neon| - -@void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_16x16_neon: - vpx_h_predictor_16x16_neon: @ PROC - vld1.8 {q1}, [r3] - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0], r1 - bx lr - .size vpx_h_predictor_16x16_neon, .-vpx_h_predictor_16x16_neon @ ENDP @ |vpx_h_predictor_16x16_neon| - -@void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_32x32_neon: - vpx_h_predictor_32x32_neon: @ PROC - sub r1, r1, #16 - mov r2, #2 -loop_h: - vld1.8 {q1}, [r3]! - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - subs r2, r2, #1 - bgt loop_h - bx lr - .size vpx_h_predictor_32x32_neon, .-vpx_h_predictor_32x32_neon @ ENDP @ |vpx_h_predictor_32x32_neon| - -@void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_4x4_neon: - vpx_tm_predictor_4x4_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.u8 {d0[]}, [r12] - - @ Load above 4 pixels - vld1.32 {d2[0]}, [r2] - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3]! - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - - @ 3rd row and 4th row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3] - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - bx lr - .size vpx_tm_predictor_4x4_neon, .-vpx_tm_predictor_4x4_neon @ ENDP @ |vpx_tm_predictor_4x4_neon| - -@void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_8x8_neon: - vpx_tm_predictor_8x8_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ preload 8 left - vld1.8 {d30}, [r3] - - @ Load above 8 pixels - vld1.64 {d2}, [r2] - - vmovl.u8 q10, d30 - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vdup.16 q0, d20[0] - vdup.16 q1, d20[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 3rd row and 4th row - vdup.16 q8, d20[2] - vdup.16 q9, d20[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - @ 5th row and 6th row - vdup.16 q0, d21[0] - vdup.16 q1, d21[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 7th row and 8th row - vdup.16 q8, d21[2] - vdup.16 q9, d21[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - bx lr - .size vpx_tm_predictor_8x8_neon, .-vpx_tm_predictor_8x8_neon @ ENDP @ |vpx_tm_predictor_8x8_neon| - -@void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_16x16_neon: - vpx_tm_predictor_16x16_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 8 pixels - vld1.8 {q1}, [r2] - - @ preload 8 left into r12 - vld1.8 {d18}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d0 - - vmovl.u8 q10, d18 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 2 times to process 16 rows. - mov r2, #2 - -loop_16x16_neon: - @ Process two rows. - vdup.16 q0, d20[0] - vdup.16 q8, d20[1] - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d20[2] @ proload next 2 rows data - vdup.16 q8, d20[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - @ Process two rows. - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[0] @ proload next 2 rows data - vdup.16 q8, d21[1] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[2] @ proload next 2 rows data - vdup.16 q8, d21[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vld1.8 {d18}, [r3]! @ preload 8 left into r12 - vmovl.u8 q10, d18 - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - subs r2, r2, #1 - bgt loop_16x16_neon - - bx lr - .size vpx_tm_predictor_16x16_neon, .-vpx_tm_predictor_16x16_neon @ ENDP @ |vpx_tm_predictor_16x16_neon| - -@void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_32x32_neon: - vpx_tm_predictor_32x32_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 32 pixels - vld1.8 {q1}, [r2]! - vld1.8 {q2}, [r2] - - @ preload 8 left pixels - vld1.8 {d26}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d0 - vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d0 - - vmovl.u8 q3, d26 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 4 times to process 32 rows. - mov r2, #4 - -loop_32x32_neon: - @ Process two rows. - vdup.16 q0, d6[0] - vdup.16 q2, d6[1] - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q1, d6[2] - vdup.16 q2, d6[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q1, q8 - vadd.s16 q13, q1, q9 - vadd.s16 q14, q1, q10 - vadd.s16 q15, q1, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[0] - vdup.16 q2, d7[1] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[2] - vdup.16 q2, d7[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vld1.8 {d0}, [r3]! @ preload 8 left pixels - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vmovl.u8 q3, d0 - vst1.64 {d24-d27}, [r0], r1 - - subs r2, r2, #1 - bgt loop_32x32_neon - - bx lr - .size vpx_tm_predictor_32x32_neon, .-vpx_tm_predictor_32x32_neon @ ENDP @ |vpx_tm_predictor_32x32_neon| - - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s deleted file mode 100644 index f6b05406fb..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s +++ /dev/null @@ -1,647 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 -@ -@ Copyright (c) 2013 The WebM project authors. All Rights Reserved. -@ -@ Use of this source code is governed by a BSD-style license -@ that can be found in the LICENSE file in the root of the source -@ tree. An additional intellectual property rights grant can be found -@ in the file PATENTS. All contributing project authors may -@ be found in the AUTHORS file in the root of the source tree. -@ - - .global vpx_lpf_horizontal_edge_8_neon - .type vpx_lpf_horizontal_edge_8_neon, function - .global vpx_lpf_horizontal_edge_16_neon - .type vpx_lpf_horizontal_edge_16_neon, function - .global vpx_lpf_vertical_16_neon - .type vpx_lpf_vertical_16_neon, function - .arm - -.text -.p2align 2 - -@ void mb_lpf_horizontal_edge(uint8_t *s, int p, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh, -@ int count) -@ r0 uint8_t *s, -@ r1 int p, /* pitch */ -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh, -@ r12 int count -_mb_lpf_horizontal_edge: - mb_lpf_horizontal_edge: @ PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - -h_count: - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines - - vld1.u8 {d0}, [r8,:64], r1 @ p7 - vld1.u8 {d1}, [r8,:64], r1 @ p6 - vld1.u8 {d2}, [r8,:64], r1 @ p5 - vld1.u8 {d3}, [r8,:64], r1 @ p4 - vld1.u8 {d4}, [r8,:64], r1 @ p3 - vld1.u8 {d5}, [r8,:64], r1 @ p2 - vld1.u8 {d6}, [r8,:64], r1 @ p1 - vld1.u8 {d7}, [r8,:64], r1 @ p0 - vld1.u8 {d8}, [r8,:64], r1 @ q0 - vld1.u8 {d9}, [r8,:64], r1 @ q1 - vld1.u8 {d10}, [r8,:64], r1 @ q2 - vld1.u8 {d11}, [r8,:64], r1 @ q3 - vld1.u8 {d12}, [r8,:64], r1 @ q4 - vld1.u8 {d13}, [r8,:64], r1 @ q5 - vld1.u8 {d14}, [r8,:64], r1 @ q6 - vld1.u8 {d15}, [r8,:64], r1 @ q7 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq h_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, r1, lsl #1 - - vst1.u8 {d25}, [r8,:64], r1 @ store op1 - vst1.u8 {d24}, [r8,:64], r1 @ store op0 - vst1.u8 {d23}, [r8,:64], r1 @ store oq0 - vst1.u8 {d26}, [r8,:64], r1 @ store oq1 - - b h_next - -h_mbfilter: - tst r7, #2 - beq h_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, r1, lsl #1 - sub r8, r8, r1 - - vst1.u8 {d18}, [r8,:64], r1 @ store op2 - vst1.u8 {d19}, [r8,:64], r1 @ store op1 - vst1.u8 {d20}, [r8,:64], r1 @ store op0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq0 - vst1.u8 {d22}, [r8,:64], r1 @ store oq1 - vst1.u8 {d23}, [r8,:64], r1 @ store oq2 - - b h_next - -h_wide_mbfilter: - sub r8, r0, r1, lsl #3 - add r8, r8, r1 - - vst1.u8 {d16}, [r8,:64], r1 @ store op6 - vst1.u8 {d24}, [r8,:64], r1 @ store op5 - vst1.u8 {d25}, [r8,:64], r1 @ store op4 - vst1.u8 {d26}, [r8,:64], r1 @ store op3 - vst1.u8 {d27}, [r8,:64], r1 @ store op2 - vst1.u8 {d18}, [r8,:64], r1 @ store op1 - vst1.u8 {d19}, [r8,:64], r1 @ store op0 - vst1.u8 {d20}, [r8,:64], r1 @ store oq0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq1 - vst1.u8 {d22}, [r8,:64], r1 @ store oq2 - vst1.u8 {d23}, [r8,:64], r1 @ store oq3 - vst1.u8 {d1}, [r8,:64], r1 @ store oq4 - vst1.u8 {d2}, [r8,:64], r1 @ store oq5 - vst1.u8 {d3}, [r8,:64], r1 @ store oq6 - -h_next: - add r0, r0, #8 - subs r12, r12, #1 - bne h_count - - vpop {d8-d15} - pop {r4-r8, pc} - - .size mb_lpf_horizontal_edge, .-mb_lpf_horizontal_edge @ ENDP @ |mb_lpf_horizontal_edge| - -@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh) -@ r0 uint8_t *s, -@ r1 int pitch, -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_8_neon: - vpx_lpf_horizontal_edge_8_neon: @ PROC - mov r12, #1 - b mb_lpf_horizontal_edge - .size vpx_lpf_horizontal_edge_8_neon, .-vpx_lpf_horizontal_edge_8_neon @ ENDP @ |vpx_lpf_horizontal_edge_8_neon| - -@ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh) -@ r0 uint8_t *s, -@ r1 int pitch, -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_16_neon: - vpx_lpf_horizontal_edge_16_neon: @ PROC - mov r12, #2 - b mb_lpf_horizontal_edge - .size vpx_lpf_horizontal_edge_16_neon, .-vpx_lpf_horizontal_edge_16_neon @ ENDP @ |vpx_lpf_horizontal_edge_16_neon| - -@ void vpx_lpf_vertical_16_neon(uint8_t *s, int p, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh) -@ r0 uint8_t *s, -@ r1 int p, /* pitch */ -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh, -_vpx_lpf_vertical_16_neon: - vpx_lpf_vertical_16_neon: @ PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, #8 - - vld1.8 {d0}, [r8,:64], r1 - vld1.8 {d8}, [r0,:64], r1 - vld1.8 {d1}, [r8,:64], r1 - vld1.8 {d9}, [r0,:64], r1 - vld1.8 {d2}, [r8,:64], r1 - vld1.8 {d10}, [r0,:64], r1 - vld1.8 {d3}, [r8,:64], r1 - vld1.8 {d11}, [r0,:64], r1 - vld1.8 {d4}, [r8,:64], r1 - vld1.8 {d12}, [r0,:64], r1 - vld1.8 {d5}, [r8,:64], r1 - vld1.8 {d13}, [r0,:64], r1 - vld1.8 {d6}, [r8,:64], r1 - vld1.8 {d14}, [r0,:64], r1 - vld1.8 {d7}, [r8,:64], r1 - vld1.8 {d15}, [r0,:64], r1 - - sub r0, r0, r1, lsl #3 - - vtrn.32 q0, q2 - vtrn.32 q1, q3 - vtrn.32 q4, q6 - vtrn.32 q5, q7 - - vtrn.16 q0, q1 - vtrn.16 q2, q3 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - vtrn.8 d0, d1 - vtrn.8 d2, d3 - vtrn.8 d4, d5 - vtrn.8 d6, d7 - - vtrn.8 d8, d9 - vtrn.8 d10, d11 - vtrn.8 d12, d13 - vtrn.8 d14, d15 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq v_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, #2 - - vswp d23, d25 - - vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 - vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 - vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 - vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 - vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 - vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 - vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 - vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 - - b v_end - -v_mbfilter: - tst r7, #2 - beq v_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, #3 - - vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 - vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 - vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 - vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 - vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 - vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 - vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 - vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 - vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 - vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 - vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 - vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 - vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 - vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 - vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 - vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 - - b v_end - -v_wide_mbfilter: - sub r8, r0, #8 - - vtrn.32 d0, d26 - vtrn.32 d16, d27 - vtrn.32 d24, d18 - vtrn.32 d25, d19 - - vtrn.16 d0, d24 - vtrn.16 d16, d25 - vtrn.16 d26, d18 - vtrn.16 d27, d19 - - vtrn.8 d0, d16 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - vtrn.8 d18, d19 - - vtrn.32 d20, d1 - vtrn.32 d21, d2 - vtrn.32 d22, d3 - vtrn.32 d23, d15 - - vtrn.16 d20, d22 - vtrn.16 d21, d23 - vtrn.16 d1, d3 - vtrn.16 d2, d15 - - vtrn.8 d20, d21 - vtrn.8 d22, d23 - vtrn.8 d1, d2 - vtrn.8 d3, d15 - - vst1.8 {d0}, [r8,:64], r1 - vst1.8 {d20}, [r0,:64], r1 - vst1.8 {d16}, [r8,:64], r1 - vst1.8 {d21}, [r0,:64], r1 - vst1.8 {d24}, [r8,:64], r1 - vst1.8 {d22}, [r0,:64], r1 - vst1.8 {d25}, [r8,:64], r1 - vst1.8 {d23}, [r0,:64], r1 - vst1.8 {d26}, [r8,:64], r1 - vst1.8 {d1}, [r0,:64], r1 - vst1.8 {d27}, [r8,:64], r1 - vst1.8 {d2}, [r0,:64], r1 - vst1.8 {d18}, [r8,:64], r1 - vst1.8 {d3}, [r0,:64], r1 - vst1.8 {d19}, [r8,:64], r1 - vst1.8 {d15}, [r0,:64], r1 - -v_end: - vpop {d8-d15} - pop {r4-r8, pc} - - .size vpx_lpf_vertical_16_neon, .-vpx_lpf_vertical_16_neon @ ENDP @ |vpx_lpf_vertical_16_neon| - -@ void vpx_wide_mbfilter_neon(); -@ This is a helper function for the loopfilters. The invidual functions do the -@ necessary load, transpose (if necessary) and store. -@ -@ r0-r3 PRESERVE -@ d16 blimit -@ d17 limit -@ d18 thresh -@ d0 p7 -@ d1 p6 -@ d2 p5 -@ d3 p4 -@ d4 p3 -@ d5 p2 -@ d6 p1 -@ d7 p0 -@ d8 q0 -@ d9 q1 -@ d10 q2 -@ d11 q3 -@ d12 q4 -@ d13 q5 -@ d14 q6 -@ d15 q7 -_vpx_wide_mbfilter_neon: - vpx_wide_mbfilter_neon: @ PROC - mov r7, #0 - - @ filter_mask - vabd.u8 d19, d4, d5 @ abs(p3 - p2) - vabd.u8 d20, d5, d6 @ abs(p2 - p1) - vabd.u8 d21, d6, d7 @ abs(p1 - p0) - vabd.u8 d22, d9, d8 @ abs(q1 - q0) - vabd.u8 d23, d10, d9 @ abs(q2 - q1) - vabd.u8 d24, d11, d10 @ abs(q3 - q2) - - @ only compare the largest value to limit - vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1)) - vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0)) - vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2)) - vmax.u8 d19, d19, d20 - - vabd.u8 d24, d7, d8 @ abs(p0 - q0) - - vmax.u8 d19, d19, d23 - - vabd.u8 d23, d6, d9 @ a = abs(p1 - q1) - vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2 - - @ abs () > limit - vcge.u8 d19, d17, d19 - - @ flatmask4 - vabd.u8 d25, d7, d5 @ abs(p0 - p2) - vabd.u8 d26, d8, d10 @ abs(q0 - q2) - vabd.u8 d27, d4, d7 @ abs(p3 - p0) - vabd.u8 d28, d11, d8 @ abs(q3 - q0) - - @ only compare the largest value to thresh - vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2)) - vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0)) - vmax.u8 d25, d25, d26 - vmax.u8 d20, d20, d25 - - vshr.u8 d23, d23, #1 @ a = a / 2 - vqadd.u8 d24, d24, d23 @ a = b + a - - vmov.u8 d30, #1 - vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1 - - vcge.u8 d20, d30, d20 @ flat - - vand d19, d19, d24 @ mask - - @ hevmask - vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1 - vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1 - vorr d21, d21, d22 @ hev - - vand d16, d20, d19 @ flat && mask - vmov r5, r6, d16 - - @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) - vabd.u8 d22, d3, d7 @ abs(p4 - p0) - vabd.u8 d23, d12, d8 @ abs(q4 - q0) - vabd.u8 d24, d7, d2 @ abs(p0 - p5) - vabd.u8 d25, d8, d13 @ abs(q0 - q5) - vabd.u8 d26, d1, d7 @ abs(p6 - p0) - vabd.u8 d27, d14, d8 @ abs(q6 - q0) - vabd.u8 d28, d0, d7 @ abs(p7 - p0) - vabd.u8 d29, d15, d8 @ abs(q7 - q0) - - @ only compare the largest value to thresh - vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0)) - vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5)) - vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0)) - vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0)) - - vmax.u8 d26, d22, d23 - vmax.u8 d27, d24, d25 - vmax.u8 d23, d26, d27 - - vcge.u8 d18, d30, d23 @ flat2 - - vmov.u8 d22, #0x80 - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #1 @ Only do filter branch - - vand d17, d18, d16 @ flat2 && flat && mask - vmov r5, r6, d17 - - @ mbfilter() function - - @ filter() function - @ convert to signed - veor d23, d8, d22 @ qs0 - veor d24, d7, d22 @ ps0 - veor d25, d6, d22 @ ps1 - veor d26, d9, d22 @ qs1 - - vmov.u8 d27, #3 - - vsub.s8 d28, d23, d24 @ ( qs0 - ps0) - vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1) - vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0) - vand d29, d29, d21 @ filter &= hev - vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0) - vmov.u8 d29, #4 - - @ filter = clamp(filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d28, q15 - - vand d28, d28, d19 @ filter &= mask - - vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3) - vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4) - vshr.s8 d30, d30, #3 @ filter2 >>= 3 - vshr.s8 d29, d29, #3 @ filter1 >>= 3 - - - vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2) - vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1) - - @ outer tap adjustments: ++filter1 >> 1 - vrshr.s8 d29, d29, #1 - vbic d29, d29, d21 @ filter &= ~hev - - vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter) - vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter) - - veor d24, d24, d22 @ *f_op0 = u^0x80 - veor d23, d23, d22 @ *f_oq0 = u^0x80 - veor d25, d25, d22 @ *f_op1 = u^0x80 - veor d26, d26, d22 @ *f_oq1 = u^0x80 - - tst r7, #1 - bxne lr - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #2 @ Only do mbfilter branch - - @ mbfilter flat && mask branch - @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's - @ and using vibt on the q's? - vmov.u8 d29, #2 - vaddl.u8 q15, d7, d8 @ op2 = p0 + q0 - vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3 - vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2 - vaddl.u8 q10, d4, d5 - vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2 - vaddl.u8 q14, d6, d9 - vqrshrn.u16 d18, q15, #3 @ r_op2 - - vsub.i16 q15, q10 - vaddl.u8 q10, d4, d6 - vadd.i16 q15, q14 - vaddl.u8 q14, d7, d10 - vqrshrn.u16 d19, q15, #3 @ r_op1 - - vsub.i16 q15, q10 - vadd.i16 q15, q14 - vaddl.u8 q14, d8, d11 - vqrshrn.u16 d20, q15, #3 @ r_op0 - - vsubw.u8 q15, d4 @ oq0 = op0 - p3 - vsubw.u8 q15, d7 @ oq0 -= p0 - vadd.i16 q15, q14 - vaddl.u8 q14, d9, d11 - vqrshrn.u16 d21, q15, #3 @ r_oq0 - - vsubw.u8 q15, d5 @ oq1 = oq0 - p2 - vsubw.u8 q15, d8 @ oq1 -= q0 - vadd.i16 q15, q14 - vaddl.u8 q14, d10, d11 - vqrshrn.u16 d22, q15, #3 @ r_oq1 - - vsubw.u8 q15, d6 @ oq2 = oq0 - p1 - vsubw.u8 q15, d9 @ oq2 -= q1 - vadd.i16 q15, q14 - vqrshrn.u16 d27, q15, #3 @ r_oq2 - - @ Filter does not set op2 or oq2, so use p2 and q2. - vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask) - vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask) - vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask) - vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask) - vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask) - - vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask) - vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask) - - tst r7, #2 - bxne lr - - @ wide_mbfilter flat2 && flat && mask branch - vmov.u8 d16, #7 - vaddl.u8 q15, d7, d8 @ op6 = p0 + q0 - vaddl.u8 q12, d2, d3 - vaddl.u8 q13, d4, d5 - vaddl.u8 q14, d1, d6 - vmlal.u8 q15, d0, d16 @ op6 += p7 * 3 - vadd.i16 q12, q13 - vadd.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vadd.i16 q15, q12 - vaddl.u8 q12, d0, d1 - vaddw.u8 q15, d1 - vaddl.u8 q13, d0, d2 - vadd.i16 q14, q15, q14 - vqrshrn.u16 d16, q15, #4 @ w_op6 - - vsub.i16 q15, q14, q12 - vaddl.u8 q14, d3, d10 - vqrshrn.u16 d24, q15, #4 @ w_op5 - - vsub.i16 q15, q13 - vaddl.u8 q13, d0, d3 - vadd.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vqrshrn.u16 d25, q15, #4 @ w_op4 - - vadd.i16 q15, q14 - vaddl.u8 q14, d0, d4 - vsub.i16 q15, q13 - vsub.i16 q14, q15, q14 - vqrshrn.u16 d26, q15, #4 @ w_op3 - - vaddw.u8 q15, q14, d5 @ op2 += p2 - vaddl.u8 q14, d0, d5 - vaddw.u8 q15, d12 @ op2 += q4 - vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m) - vqrshrn.u16 d27, q15, #4 @ w_op2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d6 - vaddw.u8 q15, d6 @ op1 += p1 - vaddw.u8 q15, d13 @ op1 += q5 - vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m) - vqrshrn.u16 d18, q15, #4 @ w_op1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d7 - vaddw.u8 q15, d7 @ op0 += p0 - vaddw.u8 q15, d14 @ op0 += q6 - vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m) - vqrshrn.u16 d19, q15, #4 @ w_op0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d1, d8 - vaddw.u8 q15, d8 @ oq0 += q0 - vaddw.u8 q15, d15 @ oq0 += q7 - vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m) - vqrshrn.u16 d20, q15, #4 @ w_oq0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vaddw.u8 q15, d9 @ oq1 += q1 - vaddl.u8 q4, d10, d15 - vaddw.u8 q15, d15 @ oq1 += q7 - vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m) - vqrshrn.u16 d21, q15, #4 @ w_oq1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d3, d10 - vadd.i16 q15, q4 - vaddl.u8 q4, d11, d15 - vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m) - vqrshrn.u16 d22, q15, #4 @ w_oq2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vadd.i16 q15, q4 - vaddl.u8 q4, d12, d15 - vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m) - vqrshrn.u16 d23, q15, #4 @ w_oq3 - - vsub.i16 q15, q14 - vaddl.u8 q14, d5, d12 - vadd.i16 q15, q4 - vaddl.u8 q4, d13, d15 - vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m) - vqrshrn.u16 d1, q15, #4 @ w_oq4 - - vsub.i16 q15, q14 - vaddl.u8 q14, d6, d13 - vadd.i16 q15, q4 - vaddl.u8 q4, d14, d15 - vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m) - vqrshrn.u16 d2, q15, #4 @ w_oq5 - - vsub.i16 q15, q14 - vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m) - vadd.i16 q15, q4 - vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m) - vqrshrn.u16 d3, q15, #4 @ w_oq6 - vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m) - vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m) - vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m) - - bx lr - .size vpx_wide_mbfilter_neon, .-vpx_wide_mbfilter_neon @ ENDP @ |vpx_wide_mbfilter_neon| - - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s deleted file mode 100644 index e8852fa0d0..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s +++ /dev/null @@ -1,44 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 -@ -@ Copyright (c) 2010 The WebM project authors. All Rights Reserved. -@ -@ Use of this source code is governed by a BSD-style license -@ that can be found in the LICENSE file in the root of the source -@ tree. An additional intellectual property rights grant can be found -@ in the file PATENTS. All contributing project authors may -@ be found in the AUTHORS file in the root of the source tree. -@ - - - .global vpx_push_neon - .type vpx_push_neon, function - .global vpx_pop_neon - .type vpx_pop_neon, function - - .arm - .eabi_attribute 24, 1 @Tag_ABI_align_needed - .eabi_attribute 25, 1 @Tag_ABI_align_preserved - -.text -.p2align 2 - -_vpx_push_neon: - vpx_push_neon: @ PROC - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - .size vpx_push_neon, .-vpx_push_neon @ ENDP - -_vpx_pop_neon: - vpx_pop_neon: @ PROC - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - .size vpx_pop_neon, .-vpx_pop_neon @ ENDP - - - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s deleted file mode 100644 index 1c527afcff..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s +++ /dev/null @@ -1,660 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas_apple.pl script. - - .set WIDE_REFERENCE, 0 - .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 - @ - @ Copyright (c) 2014 The WebM project authors. All Rights Reserved. - @ - @ Use of this source code is governed by a BSD-style license - @ that can be found in the LICENSE file in the root of the source - @ tree. An additional intellectual property rights grant can be found - @ in the file PATENTS. All contributing project authors may - @ be found in the AUTHORS file in the root of the source tree. - @ - - .globl _vpx_v_predictor_4x4_neon - .globl vpx_v_predictor_4x4_neon - .globl _vpx_v_predictor_8x8_neon - .globl vpx_v_predictor_8x8_neon - .globl _vpx_v_predictor_16x16_neon - .globl vpx_v_predictor_16x16_neon - .globl _vpx_v_predictor_32x32_neon - .globl vpx_v_predictor_32x32_neon - .globl _vpx_h_predictor_4x4_neon - .globl vpx_h_predictor_4x4_neon - .globl _vpx_h_predictor_8x8_neon - .globl vpx_h_predictor_8x8_neon - .globl _vpx_h_predictor_16x16_neon - .globl vpx_h_predictor_16x16_neon - .globl _vpx_h_predictor_32x32_neon - .globl vpx_h_predictor_32x32_neon - .globl _vpx_tm_predictor_4x4_neon - .globl vpx_tm_predictor_4x4_neon - .globl _vpx_tm_predictor_8x8_neon - .globl vpx_tm_predictor_8x8_neon - .globl _vpx_tm_predictor_16x16_neon - .globl vpx_tm_predictor_16x16_neon - .globl _vpx_tm_predictor_32x32_neon - .globl vpx_tm_predictor_32x32_neon - @ ARM - @ - @ PRESERVE8 - -.text -.p2align 2 - - @void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_4x4_neon: - vpx_v_predictor_4x4_neon: @ - vld1.32 {d0[0]}, [r2] - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - bx lr - @ @ |vpx_v_predictor_4x4_neon| - - @void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_8x8_neon: - vpx_v_predictor_8x8_neon: @ - vld1.8 {d0}, [r2] - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - bx lr - @ @ |vpx_v_predictor_8x8_neon| - - @void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_16x16_neon: - vpx_v_predictor_16x16_neon: @ - vld1.8 {q0}, [r2] - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - bx lr - @ @ |vpx_v_predictor_16x16_neon| - - @void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_32x32_neon: - vpx_v_predictor_32x32_neon: @ - vld1.8 {q0, q1}, [r2] - mov r2, #2 -loop_v: - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - subs r2, r2, #1 - bgt loop_v - bx lr - @ @ |vpx_v_predictor_32x32_neon| - - @void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_4x4_neon: - vpx_h_predictor_4x4_neon: @ - vld1.32 {d1[0]}, [r3] - vdup.8 d0, d1[0] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[1] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[2] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[3] - vst1.32 {d0[0]}, [r0], r1 - bx lr - @ @ |vpx_h_predictor_4x4_neon| - - @void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_8x8_neon: - vpx_h_predictor_8x8_neon: @ - vld1.64 {d1}, [r3] - vdup.8 d0, d1[0] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[1] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[2] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[3] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[4] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[5] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[6] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[7] - vst1.64 {d0}, [r0], r1 - bx lr - @ @ |vpx_h_predictor_8x8_neon| - - @void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_16x16_neon: - vpx_h_predictor_16x16_neon: @ - vld1.8 {q1}, [r3] - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0], r1 - bx lr - @ @ |vpx_h_predictor_16x16_neon| - - @void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_32x32_neon: - vpx_h_predictor_32x32_neon: @ - sub r1, r1, #16 - mov r2, #2 -loop_h: - vld1.8 {q1}, [r3]! - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - subs r2, r2, #1 - bgt loop_h - bx lr - @ @ |vpx_h_predictor_32x32_neon| - - @void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_4x4_neon: - vpx_tm_predictor_4x4_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.u8 {d0[]}, [r12] - - @ Load above 4 pixels - vld1.32 {d2[0]}, [r2] - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3]! - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - - @ 3rd row and 4th row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3] - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - bx lr - @ @ |vpx_tm_predictor_4x4_neon| - - @void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_8x8_neon: - vpx_tm_predictor_8x8_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ preload 8 left - vld1.8 {d30}, [r3] - - @ Load above 8 pixels - vld1.64 {d2}, [r2] - - vmovl.u8 q10, d30 - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vdup.16 q0, d20[0] - vdup.16 q1, d20[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 3rd row and 4th row - vdup.16 q8, d20[2] - vdup.16 q9, d20[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - @ 5th row and 6th row - vdup.16 q0, d21[0] - vdup.16 q1, d21[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 7th row and 8th row - vdup.16 q8, d21[2] - vdup.16 q9, d21[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - bx lr - @ @ |vpx_tm_predictor_8x8_neon| - - @void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_16x16_neon: - vpx_tm_predictor_16x16_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 8 pixels - vld1.8 {q1}, [r2] - - @ preload 8 left into r12 - vld1.8 {d18}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d0 - - vmovl.u8 q10, d18 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 2 times to process 16 rows. - mov r2, #2 - -loop_16x16_neon: - @ Process two rows. - vdup.16 q0, d20[0] - vdup.16 q8, d20[1] - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d20[2] @ proload next 2 rows data - vdup.16 q8, d20[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - @ Process two rows. - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[0] @ proload next 2 rows data - vdup.16 q8, d21[1] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[2] @ proload next 2 rows data - vdup.16 q8, d21[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vld1.8 {d18}, [r3]! @ preload 8 left into r12 - vmovl.u8 q10, d18 - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - subs r2, r2, #1 - bgt loop_16x16_neon - - bx lr - @ @ |vpx_tm_predictor_16x16_neon| - - @void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_32x32_neon: - vpx_tm_predictor_32x32_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 32 pixels - vld1.8 {q1}, [r2]! - vld1.8 {q2}, [r2] - - @ preload 8 left pixels - vld1.8 {d26}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d0 - vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d0 - - vmovl.u8 q3, d26 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 4 times to process 32 rows. - mov r2, #4 - -loop_32x32_neon: - @ Process two rows. - vdup.16 q0, d6[0] - vdup.16 q2, d6[1] - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q1, d6[2] - vdup.16 q2, d6[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q1, q8 - vadd.s16 q13, q1, q9 - vadd.s16 q14, q1, q10 - vadd.s16 q15, q1, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[0] - vdup.16 q2, d7[1] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[2] - vdup.16 q2, d7[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vld1.8 {d0}, [r3]! @ preload 8 left pixels - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vmovl.u8 q3, d0 - vst1.64 {d24-d27}, [r0], r1 - - subs r2, r2, #1 - bgt loop_32x32_neon - - bx lr - @ @ |vpx_tm_predictor_32x32_neon| - diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s deleted file mode 100644 index 69f7e5207e..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s +++ /dev/null @@ -1,649 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas_apple.pl script. - - .set WIDE_REFERENCE, 0 - .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 - @ - @ Copyright (c) 2013 The WebM project authors. All Rights Reserved. - @ - @ Use of this source code is governed by a BSD-style license - @ that can be found in the LICENSE file in the root of the source - @ tree. An additional intellectual property rights grant can be found - @ in the file PATENTS. All contributing project authors may - @ be found in the AUTHORS file in the root of the source tree. - @ - - .globl _vpx_lpf_horizontal_edge_8_neon - .globl vpx_lpf_horizontal_edge_8_neon - .globl _vpx_lpf_horizontal_edge_16_neon - .globl vpx_lpf_horizontal_edge_16_neon - .globl _vpx_lpf_vertical_16_neon - .globl vpx_lpf_vertical_16_neon - @ ARM - -.text -.p2align 2 - - @ void mb_lpf_horizontal_edge(uint8_t *s, int p, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh, - @ int count) - @ r0 uint8_t *s, - @ r1 int p, /* pitch */ - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh, - @ r12 int count -_mb_lpf_horizontal_edge: - mb_lpf_horizontal_edge: @ - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - -h_count: - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines - - vld1.u8 {d0}, [r8,:64], r1 @ p7 - vld1.u8 {d1}, [r8,:64], r1 @ p6 - vld1.u8 {d2}, [r8,:64], r1 @ p5 - vld1.u8 {d3}, [r8,:64], r1 @ p4 - vld1.u8 {d4}, [r8,:64], r1 @ p3 - vld1.u8 {d5}, [r8,:64], r1 @ p2 - vld1.u8 {d6}, [r8,:64], r1 @ p1 - vld1.u8 {d7}, [r8,:64], r1 @ p0 - vld1.u8 {d8}, [r8,:64], r1 @ q0 - vld1.u8 {d9}, [r8,:64], r1 @ q1 - vld1.u8 {d10}, [r8,:64], r1 @ q2 - vld1.u8 {d11}, [r8,:64], r1 @ q3 - vld1.u8 {d12}, [r8,:64], r1 @ q4 - vld1.u8 {d13}, [r8,:64], r1 @ q5 - vld1.u8 {d14}, [r8,:64], r1 @ q6 - vld1.u8 {d15}, [r8,:64], r1 @ q7 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq h_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, r1, lsl #1 - - vst1.u8 {d25}, [r8,:64], r1 @ store op1 - vst1.u8 {d24}, [r8,:64], r1 @ store op0 - vst1.u8 {d23}, [r8,:64], r1 @ store oq0 - vst1.u8 {d26}, [r8,:64], r1 @ store oq1 - - b h_next - -h_mbfilter: - tst r7, #2 - beq h_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, r1, lsl #1 - sub r8, r8, r1 - - vst1.u8 {d18}, [r8,:64], r1 @ store op2 - vst1.u8 {d19}, [r8,:64], r1 @ store op1 - vst1.u8 {d20}, [r8,:64], r1 @ store op0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq0 - vst1.u8 {d22}, [r8,:64], r1 @ store oq1 - vst1.u8 {d23}, [r8,:64], r1 @ store oq2 - - b h_next - -h_wide_mbfilter: - sub r8, r0, r1, lsl #3 - add r8, r8, r1 - - vst1.u8 {d16}, [r8,:64], r1 @ store op6 - vst1.u8 {d24}, [r8,:64], r1 @ store op5 - vst1.u8 {d25}, [r8,:64], r1 @ store op4 - vst1.u8 {d26}, [r8,:64], r1 @ store op3 - vst1.u8 {d27}, [r8,:64], r1 @ store op2 - vst1.u8 {d18}, [r8,:64], r1 @ store op1 - vst1.u8 {d19}, [r8,:64], r1 @ store op0 - vst1.u8 {d20}, [r8,:64], r1 @ store oq0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq1 - vst1.u8 {d22}, [r8,:64], r1 @ store oq2 - vst1.u8 {d23}, [r8,:64], r1 @ store oq3 - vst1.u8 {d1}, [r8,:64], r1 @ store oq4 - vst1.u8 {d2}, [r8,:64], r1 @ store oq5 - vst1.u8 {d3}, [r8,:64], r1 @ store oq6 - -h_next: - add r0, r0, #8 - subs r12, r12, #1 - bne h_count - - vpop {d8-d15} - pop {r4-r8, pc} - - @ @ |mb_lpf_horizontal_edge| - - @ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh) - @ r0 uint8_t *s, - @ r1 int pitch, - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_8_neon: - vpx_lpf_horizontal_edge_8_neon: @ - mov r12, #1 - b mb_lpf_horizontal_edge - @ @ |vpx_lpf_horizontal_edge_8_neon| - - @ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh) - @ r0 uint8_t *s, - @ r1 int pitch, - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_16_neon: - vpx_lpf_horizontal_edge_16_neon: @ - mov r12, #2 - b mb_lpf_horizontal_edge - @ @ |vpx_lpf_horizontal_edge_16_neon| - - @ void vpx_lpf_vertical_16_neon(uint8_t *s, int p, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh) - @ r0 uint8_t *s, - @ r1 int p, /* pitch */ - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh, -_vpx_lpf_vertical_16_neon: - vpx_lpf_vertical_16_neon: @ - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, #8 - - vld1.8 {d0}, [r8,:64], r1 - vld1.8 {d8}, [r0,:64], r1 - vld1.8 {d1}, [r8,:64], r1 - vld1.8 {d9}, [r0,:64], r1 - vld1.8 {d2}, [r8,:64], r1 - vld1.8 {d10}, [r0,:64], r1 - vld1.8 {d3}, [r8,:64], r1 - vld1.8 {d11}, [r0,:64], r1 - vld1.8 {d4}, [r8,:64], r1 - vld1.8 {d12}, [r0,:64], r1 - vld1.8 {d5}, [r8,:64], r1 - vld1.8 {d13}, [r0,:64], r1 - vld1.8 {d6}, [r8,:64], r1 - vld1.8 {d14}, [r0,:64], r1 - vld1.8 {d7}, [r8,:64], r1 - vld1.8 {d15}, [r0,:64], r1 - - sub r0, r0, r1, lsl #3 - - vtrn.32 q0, q2 - vtrn.32 q1, q3 - vtrn.32 q4, q6 - vtrn.32 q5, q7 - - vtrn.16 q0, q1 - vtrn.16 q2, q3 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - vtrn.8 d0, d1 - vtrn.8 d2, d3 - vtrn.8 d4, d5 - vtrn.8 d6, d7 - - vtrn.8 d8, d9 - vtrn.8 d10, d11 - vtrn.8 d12, d13 - vtrn.8 d14, d15 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq v_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, #2 - - vswp d23, d25 - - vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 - vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 - vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 - vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 - vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 - vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 - vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 - vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 - - b v_end - -v_mbfilter: - tst r7, #2 - beq v_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, #3 - - vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 - vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 - vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 - vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 - vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 - vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 - vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 - vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 - vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 - vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 - vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 - vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 - vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 - vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 - vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 - vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 - - b v_end - -v_wide_mbfilter: - sub r8, r0, #8 - - vtrn.32 d0, d26 - vtrn.32 d16, d27 - vtrn.32 d24, d18 - vtrn.32 d25, d19 - - vtrn.16 d0, d24 - vtrn.16 d16, d25 - vtrn.16 d26, d18 - vtrn.16 d27, d19 - - vtrn.8 d0, d16 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - vtrn.8 d18, d19 - - vtrn.32 d20, d1 - vtrn.32 d21, d2 - vtrn.32 d22, d3 - vtrn.32 d23, d15 - - vtrn.16 d20, d22 - vtrn.16 d21, d23 - vtrn.16 d1, d3 - vtrn.16 d2, d15 - - vtrn.8 d20, d21 - vtrn.8 d22, d23 - vtrn.8 d1, d2 - vtrn.8 d3, d15 - - vst1.8 {d0}, [r8,:64], r1 - vst1.8 {d20}, [r0,:64], r1 - vst1.8 {d16}, [r8,:64], r1 - vst1.8 {d21}, [r0,:64], r1 - vst1.8 {d24}, [r8,:64], r1 - vst1.8 {d22}, [r0,:64], r1 - vst1.8 {d25}, [r8,:64], r1 - vst1.8 {d23}, [r0,:64], r1 - vst1.8 {d26}, [r8,:64], r1 - vst1.8 {d1}, [r0,:64], r1 - vst1.8 {d27}, [r8,:64], r1 - vst1.8 {d2}, [r0,:64], r1 - vst1.8 {d18}, [r8,:64], r1 - vst1.8 {d3}, [r0,:64], r1 - vst1.8 {d19}, [r8,:64], r1 - vst1.8 {d15}, [r0,:64], r1 - -v_end: - vpop {d8-d15} - pop {r4-r8, pc} - - @ @ |vpx_lpf_vertical_16_neon| - - @ void vpx_wide_mbfilter_neon() @ - @ This is a helper function for the loopfilters. The invidual functions do the - @ necessary load, transpose (if necessary) and store. - @ - @ r0-r3 PRESERVE - @ d16 blimit - @ d17 limit - @ d18 thresh - @ d0 p7 - @ d1 p6 - @ d2 p5 - @ d3 p4 - @ d4 p3 - @ d5 p2 - @ d6 p1 - @ d7 p0 - @ d8 q0 - @ d9 q1 - @ d10 q2 - @ d11 q3 - @ d12 q4 - @ d13 q5 - @ d14 q6 - @ d15 q7 -_vpx_wide_mbfilter_neon: - vpx_wide_mbfilter_neon: @ - mov r7, #0 - - @ filter_mask - vabd.u8 d19, d4, d5 @ abs(p3 - p2) - vabd.u8 d20, d5, d6 @ abs(p2 - p1) - vabd.u8 d21, d6, d7 @ abs(p1 - p0) - vabd.u8 d22, d9, d8 @ abs(q1 - q0) - vabd.u8 d23, d10, d9 @ abs(q2 - q1) - vabd.u8 d24, d11, d10 @ abs(q3 - q2) - - @ only compare the largest value to limit - vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1)) - vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0)) - vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2)) - vmax.u8 d19, d19, d20 - - vabd.u8 d24, d7, d8 @ abs(p0 - q0) - - vmax.u8 d19, d19, d23 - - vabd.u8 d23, d6, d9 @ a = abs(p1 - q1) - vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2 - - @ abs () > limit - vcge.u8 d19, d17, d19 - - @ flatmask4 - vabd.u8 d25, d7, d5 @ abs(p0 - p2) - vabd.u8 d26, d8, d10 @ abs(q0 - q2) - vabd.u8 d27, d4, d7 @ abs(p3 - p0) - vabd.u8 d28, d11, d8 @ abs(q3 - q0) - - @ only compare the largest value to thresh - vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2)) - vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0)) - vmax.u8 d25, d25, d26 - vmax.u8 d20, d20, d25 - - vshr.u8 d23, d23, #1 @ a = a / 2 - vqadd.u8 d24, d24, d23 @ a = b + a - - vmov.u8 d30, #1 - vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1 - - vcge.u8 d20, d30, d20 @ flat - - vand d19, d19, d24 @ mask - - @ hevmask - vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1 - vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1 - vorr d21, d21, d22 @ hev - - vand d16, d20, d19 @ flat && mask - vmov r5, r6, d16 - - @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) - vabd.u8 d22, d3, d7 @ abs(p4 - p0) - vabd.u8 d23, d12, d8 @ abs(q4 - q0) - vabd.u8 d24, d7, d2 @ abs(p0 - p5) - vabd.u8 d25, d8, d13 @ abs(q0 - q5) - vabd.u8 d26, d1, d7 @ abs(p6 - p0) - vabd.u8 d27, d14, d8 @ abs(q6 - q0) - vabd.u8 d28, d0, d7 @ abs(p7 - p0) - vabd.u8 d29, d15, d8 @ abs(q7 - q0) - - @ only compare the largest value to thresh - vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0)) - vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5)) - vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0)) - vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0)) - - vmax.u8 d26, d22, d23 - vmax.u8 d27, d24, d25 - vmax.u8 d23, d26, d27 - - vcge.u8 d18, d30, d23 @ flat2 - - vmov.u8 d22, #0x80 - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #1 @ Only do filter branch - - vand d17, d18, d16 @ flat2 && flat && mask - vmov r5, r6, d17 - - @ mbfilter() function - - @ filter() function - @ convert to signed - veor d23, d8, d22 @ qs0 - veor d24, d7, d22 @ ps0 - veor d25, d6, d22 @ ps1 - veor d26, d9, d22 @ qs1 - - vmov.u8 d27, #3 - - vsub.s8 d28, d23, d24 @ ( qs0 - ps0) - vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1) - vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0) - vand d29, d29, d21 @ filter &= hev - vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0) - vmov.u8 d29, #4 - - @ filter = clamp(filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d28, q15 - - vand d28, d28, d19 @ filter &= mask - - vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3) - vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4) - vshr.s8 d30, d30, #3 @ filter2 >>= 3 - vshr.s8 d29, d29, #3 @ filter1 >>= 3 - - - vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2) - vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1) - - @ outer tap adjustments: ++filter1 >> 1 - vrshr.s8 d29, d29, #1 - vbic d29, d29, d21 @ filter &= ~hev - - vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter) - vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter) - - veor d24, d24, d22 @ *f_op0 = u^0x80 - veor d23, d23, d22 @ *f_oq0 = u^0x80 - veor d25, d25, d22 @ *f_op1 = u^0x80 - veor d26, d26, d22 @ *f_oq1 = u^0x80 - - tst r7, #1 - bxne lr - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #2 @ Only do mbfilter branch - - @ mbfilter flat && mask branch - @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's - @ and using vibt on the q's? - vmov.u8 d29, #2 - vaddl.u8 q15, d7, d8 @ op2 = p0 + q0 - vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3 - vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2 - vaddl.u8 q10, d4, d5 - vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2 - vaddl.u8 q14, d6, d9 - vqrshrn.u16 d18, q15, #3 @ r_op2 - - vsub.i16 q15, q10 - vaddl.u8 q10, d4, d6 - vadd.i16 q15, q14 - vaddl.u8 q14, d7, d10 - vqrshrn.u16 d19, q15, #3 @ r_op1 - - vsub.i16 q15, q10 - vadd.i16 q15, q14 - vaddl.u8 q14, d8, d11 - vqrshrn.u16 d20, q15, #3 @ r_op0 - - vsubw.u8 q15, d4 @ oq0 = op0 - p3 - vsubw.u8 q15, d7 @ oq0 -= p0 - vadd.i16 q15, q14 - vaddl.u8 q14, d9, d11 - vqrshrn.u16 d21, q15, #3 @ r_oq0 - - vsubw.u8 q15, d5 @ oq1 = oq0 - p2 - vsubw.u8 q15, d8 @ oq1 -= q0 - vadd.i16 q15, q14 - vaddl.u8 q14, d10, d11 - vqrshrn.u16 d22, q15, #3 @ r_oq1 - - vsubw.u8 q15, d6 @ oq2 = oq0 - p1 - vsubw.u8 q15, d9 @ oq2 -= q1 - vadd.i16 q15, q14 - vqrshrn.u16 d27, q15, #3 @ r_oq2 - - @ Filter does not set op2 or oq2, so use p2 and q2. - vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask) - vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask) - vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask) - vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask) - vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask) - - vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask) - vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask) - - tst r7, #2 - bxne lr - - @ wide_mbfilter flat2 && flat && mask branch - vmov.u8 d16, #7 - vaddl.u8 q15, d7, d8 @ op6 = p0 + q0 - vaddl.u8 q12, d2, d3 - vaddl.u8 q13, d4, d5 - vaddl.u8 q14, d1, d6 - vmlal.u8 q15, d0, d16 @ op6 += p7 * 3 - vadd.i16 q12, q13 - vadd.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vadd.i16 q15, q12 - vaddl.u8 q12, d0, d1 - vaddw.u8 q15, d1 - vaddl.u8 q13, d0, d2 - vadd.i16 q14, q15, q14 - vqrshrn.u16 d16, q15, #4 @ w_op6 - - vsub.i16 q15, q14, q12 - vaddl.u8 q14, d3, d10 - vqrshrn.u16 d24, q15, #4 @ w_op5 - - vsub.i16 q15, q13 - vaddl.u8 q13, d0, d3 - vadd.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vqrshrn.u16 d25, q15, #4 @ w_op4 - - vadd.i16 q15, q14 - vaddl.u8 q14, d0, d4 - vsub.i16 q15, q13 - vsub.i16 q14, q15, q14 - vqrshrn.u16 d26, q15, #4 @ w_op3 - - vaddw.u8 q15, q14, d5 @ op2 += p2 - vaddl.u8 q14, d0, d5 - vaddw.u8 q15, d12 @ op2 += q4 - vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m) - vqrshrn.u16 d27, q15, #4 @ w_op2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d6 - vaddw.u8 q15, d6 @ op1 += p1 - vaddw.u8 q15, d13 @ op1 += q5 - vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m) - vqrshrn.u16 d18, q15, #4 @ w_op1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d7 - vaddw.u8 q15, d7 @ op0 += p0 - vaddw.u8 q15, d14 @ op0 += q6 - vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m) - vqrshrn.u16 d19, q15, #4 @ w_op0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d1, d8 - vaddw.u8 q15, d8 @ oq0 += q0 - vaddw.u8 q15, d15 @ oq0 += q7 - vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m) - vqrshrn.u16 d20, q15, #4 @ w_oq0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vaddw.u8 q15, d9 @ oq1 += q1 - vaddl.u8 q4, d10, d15 - vaddw.u8 q15, d15 @ oq1 += q7 - vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m) - vqrshrn.u16 d21, q15, #4 @ w_oq1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d3, d10 - vadd.i16 q15, q4 - vaddl.u8 q4, d11, d15 - vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m) - vqrshrn.u16 d22, q15, #4 @ w_oq2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vadd.i16 q15, q4 - vaddl.u8 q4, d12, d15 - vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m) - vqrshrn.u16 d23, q15, #4 @ w_oq3 - - vsub.i16 q15, q14 - vaddl.u8 q14, d5, d12 - vadd.i16 q15, q4 - vaddl.u8 q4, d13, d15 - vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m) - vqrshrn.u16 d1, q15, #4 @ w_oq4 - - vsub.i16 q15, q14 - vaddl.u8 q14, d6, d13 - vadd.i16 q15, q4 - vaddl.u8 q4, d14, d15 - vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m) - vqrshrn.u16 d2, q15, #4 @ w_oq5 - - vsub.i16 q15, q14 - vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m) - vadd.i16 q15, q4 - vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m) - vqrshrn.u16 d3, q15, #4 @ w_oq6 - vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m) - vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m) - vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m) - - bx lr - @ @ |vpx_wide_mbfilter_neon| - diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s deleted file mode 100644 index f322b698b4..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s +++ /dev/null @@ -1,46 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas_apple.pl script. - - .set WIDE_REFERENCE, 0 - .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 - @ - @ Copyright (c) 2010 The WebM project authors. All Rights Reserved. - @ - @ Use of this source code is governed by a BSD-style license - @ that can be found in the LICENSE file in the root of the source - @ tree. An additional intellectual property rights grant can be found - @ in the file PATENTS. All contributing project authors may - @ be found in the AUTHORS file in the root of the source tree. - @ - - - .globl _vpx_push_neon - .globl vpx_push_neon - .globl _vpx_pop_neon - .globl vpx_pop_neon - - @ ARM - @ - @ PRESERVE8 - -.text -.p2align 2 - -_vpx_push_neon: - vpx_push_neon: @ - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - @ - -_vpx_pop_neon: - vpx_pop_neon: @ - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - @ - - diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c deleted file mode 100644 index f734e48027..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vpx_idct16x16_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d2u8, d3u8, d30u8, d31u8; - uint64x1_t d2u64, d3u64, d4u64, d5u64; - uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q0s16; - uint8_t *d1, *d2; - int16_t i, j, a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 6); - - q0s16 = vdupq_n_s16(a1); - q0u16 = vreinterpretq_u16_s16(q0s16); - - for (d1 = d2 = dest, i = 0; i < 4; i++) { - for (j = 0; j < 2; j++) { - d2u64 = vld1_u64((const uint64_t *)d1); - d3u64 = vld1_u64((const uint64_t *)(d1 + 8)); - d1 += dest_stride; - d4u64 = vld1_u64((const uint64_t *)d1); - d5u64 = vld1_u64((const uint64_t *)(d1 + 8)); - d1 += dest_stride; - - q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64)); - q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64)); - q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64)); - q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8)); - vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d31u8)); - d2 += dest_stride; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c deleted file mode 100644 index 651ebb21f9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c +++ /dev/null @@ -1,1317 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" - -static INLINE void TRANSPOSE8X8( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - *q12s16 = vcombine_s16(d17s16, d25s16); - *q13s16 = vcombine_s16(d19s16, d27s16); - *q14s16 = vcombine_s16(d21s16, d29s16); - *q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), - vreinterpretq_s32_s16(*q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), - vreinterpretq_s32_s16(*q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), - vreinterpretq_s32_s16(*q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), - vreinterpretq_s32_s16(*q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - *q8s16 = q0x2s16.val[0]; - *q9s16 = q0x2s16.val[1]; - *q10s16 = q1x2s16.val[0]; - *q11s16 = q1x2s16.val[1]; - *q12s16 = q2x2s16.val[0]; - *q13s16 = q2x2s16.val[1]; - *q14s16 = q3x2s16.val[0]; - *q15s16 = q3x2s16.val[1]; - return; -} - -void vpx_idct16x16_256_add_neon_pass1( - int16_t *in, - int16_t *out, - int output_stride) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - uint64x1_t d16u64, d17u64, d18u64, d19u64, d20u64, d21u64, d22u64, d23u64; - uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q5s32, q6s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - int16x8x2_t q0x2s16; - - q0x2s16 = vld2q_s16(in); - q8s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q9s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q10s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q11s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q12s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q13s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q14s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - d30s16 = vget_low_s16(q15s16); - d31s16 = vget_high_s16(q15s16); - - // stage 3 - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d18s16, d1s16); - q6s32 = vmull_s16(d19s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlal_s16(q5s32, d30s16, d0s16); - q6s32 = vmlal_s16(q6s32, d31s16, d0s16); - - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d8s16 = vqrshrn_n_s32(q2s32, 14); - d9s16 = vqrshrn_n_s32(q3s32, 14); - d14s16 = vqrshrn_n_s32(q5s32, 14); - d15s16 = vqrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - q2s32 = vmull_s16(d26s16, d2s16); - q3s32 = vmull_s16(d27s16, d2s16); - q9s32 = vmull_s16(d26s16, d3s16); - q15s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlsl_s16(q2s32, d22s16, d3s16); - q3s32 = vmlsl_s16(q3s32, d23s16, d3s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q15s32 = vmlal_s16(q15s32, d23s16, d2s16); - - d10s16 = vqrshrn_n_s32(q2s32, 14); - d11s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q15s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 4 - d30s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d30s16); - q11s32 = vmull_s16(d17s16, d30s16); - q0s32 = vmull_s16(d24s16, d30s16); - q1s32 = vmull_s16(d25s16, d30s16); - - d30s16 = vdup_n_s16(cospi_24_64); - d31s16 = vdup_n_s16(cospi_8_64); - - q3s32 = vaddq_s32(q2s32, q0s32); - q12s32 = vaddq_s32(q11s32, q1s32); - q13s32 = vsubq_s32(q2s32, q0s32); - q1s32 = vsubq_s32(q11s32, q1s32); - - d16s16 = vqrshrn_n_s32(q3s32, 14); - d17s16 = vqrshrn_n_s32(q12s32, 14); - d18s16 = vqrshrn_n_s32(q13s32, 14); - d19s16 = vqrshrn_n_s32(q1s32, 14); - q8s16 = vcombine_s16(d16s16, d17s16); - q9s16 = vcombine_s16(d18s16, d19s16); - - q0s32 = vmull_s16(d20s16, d31s16); - q1s32 = vmull_s16(d21s16, d31s16); - q12s32 = vmull_s16(d20s16, d30s16); - q13s32 = vmull_s16(d21s16, d30s16); - - q0s32 = vmlal_s16(q0s32, d28s16, d30s16); - q1s32 = vmlal_s16(q1s32, d29s16, d30s16); - q12s32 = vmlsl_s16(q12s32, d28s16, d31s16); - q13s32 = vmlsl_s16(q13s32, d29s16, d31s16); - - d22s16 = vqrshrn_n_s32(q0s32, 14); - d23s16 = vqrshrn_n_s32(q1s32, 14); - d20s16 = vqrshrn_n_s32(q12s32, 14); - d21s16 = vqrshrn_n_s32(q13s32, 14); - q10s16 = vcombine_s16(d20s16, d21s16); - q11s16 = vcombine_s16(d22s16, d23s16); - - q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - q14s16 = vsubq_s16(q7s16, q6s16); - q15s16 = vaddq_s16(q6s16, q7s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - // stage 5 - q0s16 = vaddq_s16(q8s16, q11s16); - q1s16 = vaddq_s16(q9s16, q10s16); - q2s16 = vsubq_s16(q9s16, q10s16); - q3s16 = vsubq_s16(q8s16, q11s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q11s32 = vmull_s16(d26s16, d16s16); - q12s32 = vmull_s16(d27s16, d16s16); - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - - q6s32 = vsubq_s32(q9s32, q11s32); - q13s32 = vsubq_s32(q10s32, q12s32); - q9s32 = vaddq_s32(q9s32, q11s32); - q10s32 = vaddq_s32(q10s32, q12s32); - - d10s16 = vqrshrn_n_s32(q6s32, 14); - d11s16 = vqrshrn_n_s32(q13s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q10s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 6 - q8s16 = vaddq_s16(q0s16, q15s16); - q9s16 = vaddq_s16(q1s16, q6s16); - q10s16 = vaddq_s16(q2s16, q5s16); - q11s16 = vaddq_s16(q3s16, q4s16); - q12s16 = vsubq_s16(q3s16, q4s16); - q13s16 = vsubq_s16(q2s16, q5s16); - q14s16 = vsubq_s16(q1s16, q6s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - d16u64 = vreinterpret_u64_s16(vget_low_s16(q8s16)); - d17u64 = vreinterpret_u64_s16(vget_high_s16(q8s16)); - d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); - d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); - d20u64 = vreinterpret_u64_s16(vget_low_s16(q10s16)); - d21u64 = vreinterpret_u64_s16(vget_high_s16(q10s16)); - d22u64 = vreinterpret_u64_s16(vget_low_s16(q11s16)); - d23u64 = vreinterpret_u64_s16(vget_high_s16(q11s16)); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); - d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); - d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); - d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); - - // store the data - output_stride >>= 1; // output_stride / 2, out is int16_t - vst1_u64((uint64_t *)out, d16u64); - out += output_stride; - vst1_u64((uint64_t *)out, d17u64); - out += output_stride; - vst1_u64((uint64_t *)out, d18u64); - out += output_stride; - vst1_u64((uint64_t *)out, d19u64); - out += output_stride; - vst1_u64((uint64_t *)out, d20u64); - out += output_stride; - vst1_u64((uint64_t *)out, d21u64); - out += output_stride; - vst1_u64((uint64_t *)out, d22u64); - out += output_stride; - vst1_u64((uint64_t *)out, d23u64); - out += output_stride; - vst1_u64((uint64_t *)out, d24u64); - out += output_stride; - vst1_u64((uint64_t *)out, d25u64); - out += output_stride; - vst1_u64((uint64_t *)out, d26u64); - out += output_stride; - vst1_u64((uint64_t *)out, d27u64); - out += output_stride; - vst1_u64((uint64_t *)out, d28u64); - out += output_stride; - vst1_u64((uint64_t *)out, d29u64); - out += output_stride; - vst1_u64((uint64_t *)out, d30u64); - out += output_stride; - vst1_u64((uint64_t *)out, d31u64); - return; -} - -void vpx_idct16x16_256_add_neon_pass2( - int16_t *src, - int16_t *out, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride) { - uint8_t *d; - uint8x8_t d12u8, d13u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - uint64x1_t d24u64, d25u64, d26u64, d27u64; - int64x1_t d12s64, d13s64; - uint16x8_t q2u16, q3u16, q4u16, q5u16, q8u16; - uint16x8_t q9u16, q12u16, q13u16, q14u16, q15u16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32; - int16x8x2_t q0x2s16; - - q0x2s16 = vld2q_s16(src); - q8s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q9s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q10s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q11s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q12s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q13s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q14s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - d30s16 = vget_low_s16(q15s16); - d31s16 = vget_high_s16(q15s16); - - // stage 3 - d12s16 = vdup_n_s16(cospi_30_64); - d13s16 = vdup_n_s16(cospi_2_64); - - q2s32 = vmull_s16(d16s16, d12s16); - q3s32 = vmull_s16(d17s16, d12s16); - q1s32 = vmull_s16(d16s16, d13s16); - q4s32 = vmull_s16(d17s16, d13s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d13s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d13s16); - q1s32 = vmlal_s16(q1s32, d30s16, d12s16); - q4s32 = vmlal_s16(q4s32, d31s16, d12s16); - - d0s16 = vqrshrn_n_s32(q2s32, 14); - d1s16 = vqrshrn_n_s32(q3s32, 14); - d14s16 = vqrshrn_n_s32(q1s32, 14); - d15s16 = vqrshrn_n_s32(q4s32, 14); - q0s16 = vcombine_s16(d0s16, d1s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d30s16 = vdup_n_s16(cospi_14_64); - d31s16 = vdup_n_s16(cospi_18_64); - - q2s32 = vmull_s16(d24s16, d30s16); - q3s32 = vmull_s16(d25s16, d30s16); - q4s32 = vmull_s16(d24s16, d31s16); - q5s32 = vmull_s16(d25s16, d31s16); - - q2s32 = vmlsl_s16(q2s32, d22s16, d31s16); - q3s32 = vmlsl_s16(q3s32, d23s16, d31s16); - q4s32 = vmlal_s16(q4s32, d22s16, d30s16); - q5s32 = vmlal_s16(q5s32, d23s16, d30s16); - - d2s16 = vqrshrn_n_s32(q2s32, 14); - d3s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q4s32, 14); - d13s16 = vqrshrn_n_s32(q5s32, 14); - q1s16 = vcombine_s16(d2s16, d3s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - d30s16 = vdup_n_s16(cospi_22_64); - d31s16 = vdup_n_s16(cospi_10_64); - - q11s32 = vmull_s16(d20s16, d30s16); - q12s32 = vmull_s16(d21s16, d30s16); - q4s32 = vmull_s16(d20s16, d31s16); - q5s32 = vmull_s16(d21s16, d31s16); - - q11s32 = vmlsl_s16(q11s32, d26s16, d31s16); - q12s32 = vmlsl_s16(q12s32, d27s16, d31s16); - q4s32 = vmlal_s16(q4s32, d26s16, d30s16); - q5s32 = vmlal_s16(q5s32, d27s16, d30s16); - - d4s16 = vqrshrn_n_s32(q11s32, 14); - d5s16 = vqrshrn_n_s32(q12s32, 14); - d11s16 = vqrshrn_n_s32(q5s32, 14); - d10s16 = vqrshrn_n_s32(q4s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - d30s16 = vdup_n_s16(cospi_6_64); - d31s16 = vdup_n_s16(cospi_26_64); - - q10s32 = vmull_s16(d28s16, d30s16); - q11s32 = vmull_s16(d29s16, d30s16); - q12s32 = vmull_s16(d28s16, d31s16); - q13s32 = vmull_s16(d29s16, d31s16); - - q10s32 = vmlsl_s16(q10s32, d18s16, d31s16); - q11s32 = vmlsl_s16(q11s32, d19s16, d31s16); - q12s32 = vmlal_s16(q12s32, d18s16, d30s16); - q13s32 = vmlal_s16(q13s32, d19s16, d30s16); - - d6s16 = vqrshrn_n_s32(q10s32, 14); - d7s16 = vqrshrn_n_s32(q11s32, 14); - d8s16 = vqrshrn_n_s32(q12s32, 14); - d9s16 = vqrshrn_n_s32(q13s32, 14); - q3s16 = vcombine_s16(d6s16, d7s16); - q4s16 = vcombine_s16(d8s16, d9s16); - - // stage 3 - q9s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q10s16 = vsubq_s16(q3s16, q2s16); - q11s16 = vaddq_s16(q2s16, q3s16); - q12s16 = vaddq_s16(q4s16, q5s16); - q13s16 = vsubq_s16(q4s16, q5s16); - q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q6s16, q7s16); - - // stage 4 - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - q2s32 = vmull_s16(d18s16, d31s16); - q3s32 = vmull_s16(d19s16, d31s16); - q4s32 = vmull_s16(d28s16, d31s16); - q5s32 = vmull_s16(d29s16, d31s16); - - q2s32 = vmlal_s16(q2s32, d28s16, d30s16); - q3s32 = vmlal_s16(q3s32, d29s16, d30s16); - q4s32 = vmlsl_s16(q4s32, d18s16, d30s16); - q5s32 = vmlsl_s16(q5s32, d19s16, d30s16); - - d12s16 = vqrshrn_n_s32(q2s32, 14); - d13s16 = vqrshrn_n_s32(q3s32, 14); - d2s16 = vqrshrn_n_s32(q4s32, 14); - d3s16 = vqrshrn_n_s32(q5s32, 14); - q1s16 = vcombine_s16(d2s16, d3s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - q3s16 = q11s16; - q4s16 = q12s16; - - d30s16 = vdup_n_s16(-cospi_8_64); - q11s32 = vmull_s16(d26s16, d30s16); - q12s32 = vmull_s16(d27s16, d30s16); - q8s32 = vmull_s16(d20s16, d30s16); - q9s32 = vmull_s16(d21s16, d30s16); - - q11s32 = vmlsl_s16(q11s32, d20s16, d31s16); - q12s32 = vmlsl_s16(q12s32, d21s16, d31s16); - q8s32 = vmlal_s16(q8s32, d26s16, d31s16); - q9s32 = vmlal_s16(q9s32, d27s16, d31s16); - - d4s16 = vqrshrn_n_s32(q11s32, 14); - d5s16 = vqrshrn_n_s32(q12s32, 14); - d10s16 = vqrshrn_n_s32(q8s32, 14); - d11s16 = vqrshrn_n_s32(q9s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - // stage 5 - q8s16 = vaddq_s16(q0s16, q3s16); - q9s16 = vaddq_s16(q1s16, q2s16); - q10s16 = vsubq_s16(q1s16, q2s16); - q11s16 = vsubq_s16(q0s16, q3s16); - q12s16 = vsubq_s16(q7s16, q4s16); - q13s16 = vsubq_s16(q6s16, q5s16); - q14s16 = vaddq_s16(q6s16, q5s16); - q15s16 = vaddq_s16(q7s16, q4s16); - - // stage 6 - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - - d14s16 = vdup_n_s16(cospi_16_64); - - q3s32 = vmull_s16(d26s16, d14s16); - q4s32 = vmull_s16(d27s16, d14s16); - q0s32 = vmull_s16(d20s16, d14s16); - q1s32 = vmull_s16(d21s16, d14s16); - - q5s32 = vsubq_s32(q3s32, q0s32); - q6s32 = vsubq_s32(q4s32, q1s32); - q10s32 = vaddq_s32(q3s32, q0s32); - q4s32 = vaddq_s32(q4s32, q1s32); - - d4s16 = vqrshrn_n_s32(q5s32, 14); - d5s16 = vqrshrn_n_s32(q6s32, 14); - d10s16 = vqrshrn_n_s32(q10s32, 14); - d11s16 = vqrshrn_n_s32(q4s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q0s32 = vmull_s16(d22s16, d14s16); - q1s32 = vmull_s16(d23s16, d14s16); - q13s32 = vmull_s16(d24s16, d14s16); - q6s32 = vmull_s16(d25s16, d14s16); - - q10s32 = vsubq_s32(q13s32, q0s32); - q4s32 = vsubq_s32(q6s32, q1s32); - q13s32 = vaddq_s32(q13s32, q0s32); - q6s32 = vaddq_s32(q6s32, q1s32); - - d6s16 = vqrshrn_n_s32(q10s32, 14); - d7s16 = vqrshrn_n_s32(q4s32, 14); - d8s16 = vqrshrn_n_s32(q13s32, 14); - d9s16 = vqrshrn_n_s32(q6s32, 14); - q3s16 = vcombine_s16(d6s16, d7s16); - q4s16 = vcombine_s16(d8s16, d9s16); - - // stage 7 - if (skip_adding != 0) { - d = dest; - // load the data in pass1 - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - - q12s16 = vaddq_s16(q0s16, q15s16); - q13s16 = vaddq_s16(q1s16, q14s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q14s16 = vsubq_s16(q1s16, q14s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q12s16 = vaddq_s16(q10s16, q5s16); - q13s16 = vaddq_s16(q11s16, q4s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q4s16 = vsubq_s16(q11s16, q4s16); - q5s16 = vsubq_s16(q10s16, q5s16); - - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q12s16 = vaddq_s16(q0s16, q3s16); - q13s16 = vaddq_s16(q1s16, q2s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q2s16 = vsubq_s16(q1s16, q2s16); - q3s16 = vsubq_s16(q0s16, q3s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q12s16 = vaddq_s16(q10s16, q9s16); - q13s16 = vaddq_s16(q11s16, q8s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q8s16 = vsubq_s16(q11s16, q8s16); - q9s16 = vsubq_s16(q10s16, q9s16); - - // store the data out 8,9,10,11,12,13,14,15 - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q8s16 = vrshrq_n_s16(q8s16, 6); - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q9s16 = vrshrq_n_s16(q9s16, 6); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q2s16 = vrshrq_n_s16(q2s16, 6); - q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q3s16 = vrshrq_n_s16(q3s16, 6); - q3u16 = vaddw_u8(vreinterpretq_u16_s16(q3s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q3u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q4s16 = vrshrq_n_s16(q4s16, 6); - q4u16 = vaddw_u8(vreinterpretq_u16_s16(q4s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q4u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q5s16 = vrshrq_n_s16(q5s16, 6); - q5u16 = vaddw_u8(vreinterpretq_u16_s16(q5s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q5u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q14s16 = vrshrq_n_s16(q14s16, 6); - q14u16 = vaddw_u8(vreinterpretq_u16_s16(q14s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q14u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - q15s16 = vrshrq_n_s16(q15s16, 6); - q15u16 = vaddw_u8(vreinterpretq_u16_s16(q15s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q15u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - } else { // skip_adding_dest - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q15s16); - q13s16 = vaddq_s16(q1s16, q14s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q14s16 = vsubq_s16(q1s16, q14s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q10s16, q5s16); - q13s16 = vaddq_s16(q11s16, q4s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q4s16 = vsubq_s16(q11s16, q4s16); - q5s16 = vsubq_s16(q10s16, q5s16); - - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q3s16); - q13s16 = vaddq_s16(q1s16, q2s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q2s16 = vsubq_s16(q1s16, q2s16); - q3s16 = vsubq_s16(q0s16, q3s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q10s16, q9s16); - q13s16 = vaddq_s16(q11s16, q8s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q8s16 = vsubq_s16(q11s16, q8s16); - q9s16 = vsubq_s16(q10s16, q9s16); - - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q8s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q8s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q9s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q9s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q2s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q2s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q3s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q3s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q4s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q4s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q5s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q5s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q14s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q14s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q15s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q15s16))); - } - return; -} - -void vpx_idct16x16_10_add_neon_pass1( - int16_t *in, - int16_t *out, - int output_stride) { - int16x4_t d4s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - uint64x1_t d4u64, d5u64, d18u64, d19u64, d20u64, d21u64, d22u64, d23u64; - uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; - int16x8_t q0s16, q1s16, q2s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q6s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q15s32; - int16x8x2_t q0x2s16; - - q0x2s16 = vld2q_s16(in); - q8s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q9s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q10s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q11s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q12s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q13s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q14s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // stage 3 - q0s16 = vdupq_n_s16(cospi_28_64 * 2); - q1s16 = vdupq_n_s16(cospi_4_64 * 2); - - q4s16 = vqrdmulhq_s16(q9s16, q0s16); - q7s16 = vqrdmulhq_s16(q9s16, q1s16); - - // stage 4 - q1s16 = vdupq_n_s16(cospi_16_64 * 2); - d4s16 = vdup_n_s16(cospi_16_64); - - q8s16 = vqrdmulhq_s16(q8s16, q1s16); - - d8s16 = vget_low_s16(q4s16); - d9s16 = vget_high_s16(q4s16); - d14s16 = vget_low_s16(q7s16); - d15s16 = vget_high_s16(q7s16); - q9s32 = vmull_s16(d14s16, d4s16); - q10s32 = vmull_s16(d15s16, d4s16); - q12s32 = vmull_s16(d9s16, d4s16); - q11s32 = vmull_s16(d8s16, d4s16); - - q15s32 = vsubq_s32(q10s32, q12s32); - q6s32 = vsubq_s32(q9s32, q11s32); - q9s32 = vaddq_s32(q9s32, q11s32); - q10s32 = vaddq_s32(q10s32, q12s32); - - d11s16 = vqrshrn_n_s32(q15s32, 14); - d10s16 = vqrshrn_n_s32(q6s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q10s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 6 - q2s16 = vaddq_s16(q8s16, q7s16); - q9s16 = vaddq_s16(q8s16, q6s16); - q10s16 = vaddq_s16(q8s16, q5s16); - q11s16 = vaddq_s16(q8s16, q4s16); - q12s16 = vsubq_s16(q8s16, q4s16); - q13s16 = vsubq_s16(q8s16, q5s16); - q14s16 = vsubq_s16(q8s16, q6s16); - q15s16 = vsubq_s16(q8s16, q7s16); - - d4u64 = vreinterpret_u64_s16(vget_low_s16(q2s16)); - d5u64 = vreinterpret_u64_s16(vget_high_s16(q2s16)); - d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); - d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); - d20u64 = vreinterpret_u64_s16(vget_low_s16(q10s16)); - d21u64 = vreinterpret_u64_s16(vget_high_s16(q10s16)); - d22u64 = vreinterpret_u64_s16(vget_low_s16(q11s16)); - d23u64 = vreinterpret_u64_s16(vget_high_s16(q11s16)); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); - d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); - d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); - d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); - - // store the data - output_stride >>= 1; // output_stride / 2, out is int16_t - vst1_u64((uint64_t *)out, d4u64); - out += output_stride; - vst1_u64((uint64_t *)out, d5u64); - out += output_stride; - vst1_u64((uint64_t *)out, d18u64); - out += output_stride; - vst1_u64((uint64_t *)out, d19u64); - out += output_stride; - vst1_u64((uint64_t *)out, d20u64); - out += output_stride; - vst1_u64((uint64_t *)out, d21u64); - out += output_stride; - vst1_u64((uint64_t *)out, d22u64); - out += output_stride; - vst1_u64((uint64_t *)out, d23u64); - out += output_stride; - vst1_u64((uint64_t *)out, d24u64); - out += output_stride; - vst1_u64((uint64_t *)out, d25u64); - out += output_stride; - vst1_u64((uint64_t *)out, d26u64); - out += output_stride; - vst1_u64((uint64_t *)out, d27u64); - out += output_stride; - vst1_u64((uint64_t *)out, d28u64); - out += output_stride; - vst1_u64((uint64_t *)out, d29u64); - out += output_stride; - vst1_u64((uint64_t *)out, d30u64); - out += output_stride; - vst1_u64((uint64_t *)out, d31u64); - return; -} - -void vpx_idct16x16_10_add_neon_pass2( - int16_t *src, - int16_t *out, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d30s16, d31s16; - uint64x1_t d4u64, d5u64, d6u64, d7u64, d8u64, d9u64, d10u64, d11u64; - uint64x1_t d16u64, d17u64, d18u64, d19u64; - uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32; - int16x8x2_t q0x2s16; - (void)skip_adding; - (void)dest; - (void)dest_stride; - - q0x2s16 = vld2q_s16(src); - q8s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q9s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q10s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q11s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q12s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q13s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q14s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // stage 3 - q6s16 = vdupq_n_s16(cospi_30_64 * 2); - q0s16 = vqrdmulhq_s16(q8s16, q6s16); - q6s16 = vdupq_n_s16(cospi_2_64 * 2); - q7s16 = vqrdmulhq_s16(q8s16, q6s16); - - q15s16 = vdupq_n_s16(-cospi_26_64 * 2); - q14s16 = vdupq_n_s16(cospi_6_64 * 2); - q3s16 = vqrdmulhq_s16(q9s16, q15s16); - q4s16 = vqrdmulhq_s16(q9s16, q14s16); - - // stage 4 - d0s16 = vget_low_s16(q0s16); - d1s16 = vget_high_s16(q0s16); - d6s16 = vget_low_s16(q3s16); - d7s16 = vget_high_s16(q3s16); - d8s16 = vget_low_s16(q4s16); - d9s16 = vget_high_s16(q4s16); - d14s16 = vget_low_s16(q7s16); - d15s16 = vget_high_s16(q7s16); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - q12s32 = vmull_s16(d14s16, d31s16); - q5s32 = vmull_s16(d15s16, d31s16); - q2s32 = vmull_s16(d0s16, d31s16); - q11s32 = vmull_s16(d1s16, d31s16); - - q12s32 = vmlsl_s16(q12s32, d0s16, d30s16); - q5s32 = vmlsl_s16(q5s32, d1s16, d30s16); - q2s32 = vmlal_s16(q2s32, d14s16, d30s16); - q11s32 = vmlal_s16(q11s32, d15s16, d30s16); - - d2s16 = vqrshrn_n_s32(q12s32, 14); - d3s16 = vqrshrn_n_s32(q5s32, 14); - d12s16 = vqrshrn_n_s32(q2s32, 14); - d13s16 = vqrshrn_n_s32(q11s32, 14); - q1s16 = vcombine_s16(d2s16, d3s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - d30s16 = vdup_n_s16(-cospi_8_64); - q10s32 = vmull_s16(d8s16, d30s16); - q13s32 = vmull_s16(d9s16, d30s16); - q8s32 = vmull_s16(d6s16, d30s16); - q9s32 = vmull_s16(d7s16, d30s16); - - q10s32 = vmlsl_s16(q10s32, d6s16, d31s16); - q13s32 = vmlsl_s16(q13s32, d7s16, d31s16); - q8s32 = vmlal_s16(q8s32, d8s16, d31s16); - q9s32 = vmlal_s16(q9s32, d9s16, d31s16); - - d4s16 = vqrshrn_n_s32(q10s32, 14); - d5s16 = vqrshrn_n_s32(q13s32, 14); - d10s16 = vqrshrn_n_s32(q8s32, 14); - d11s16 = vqrshrn_n_s32(q9s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - // stage 5 - q8s16 = vaddq_s16(q0s16, q3s16); - q9s16 = vaddq_s16(q1s16, q2s16); - q10s16 = vsubq_s16(q1s16, q2s16); - q11s16 = vsubq_s16(q0s16, q3s16); - q12s16 = vsubq_s16(q7s16, q4s16); - q13s16 = vsubq_s16(q6s16, q5s16); - q14s16 = vaddq_s16(q6s16, q5s16); - q15s16 = vaddq_s16(q7s16, q4s16); - - // stage 6 - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - - d14s16 = vdup_n_s16(cospi_16_64); - q3s32 = vmull_s16(d26s16, d14s16); - q4s32 = vmull_s16(d27s16, d14s16); - q0s32 = vmull_s16(d20s16, d14s16); - q1s32 = vmull_s16(d21s16, d14s16); - - q5s32 = vsubq_s32(q3s32, q0s32); - q6s32 = vsubq_s32(q4s32, q1s32); - q0s32 = vaddq_s32(q3s32, q0s32); - q4s32 = vaddq_s32(q4s32, q1s32); - - d4s16 = vqrshrn_n_s32(q5s32, 14); - d5s16 = vqrshrn_n_s32(q6s32, 14); - d10s16 = vqrshrn_n_s32(q0s32, 14); - d11s16 = vqrshrn_n_s32(q4s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q0s32 = vmull_s16(d22s16, d14s16); - q1s32 = vmull_s16(d23s16, d14s16); - q13s32 = vmull_s16(d24s16, d14s16); - q6s32 = vmull_s16(d25s16, d14s16); - - q10s32 = vsubq_s32(q13s32, q0s32); - q4s32 = vsubq_s32(q6s32, q1s32); - q13s32 = vaddq_s32(q13s32, q0s32); - q6s32 = vaddq_s32(q6s32, q1s32); - - d6s16 = vqrshrn_n_s32(q10s32, 14); - d7s16 = vqrshrn_n_s32(q4s32, 14); - d8s16 = vqrshrn_n_s32(q13s32, 14); - d9s16 = vqrshrn_n_s32(q6s32, 14); - q3s16 = vcombine_s16(d6s16, d7s16); - q4s16 = vcombine_s16(d8s16, d9s16); - - // stage 7 - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q15s16); - q13s16 = vaddq_s16(q1s16, q14s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q14s16 = vsubq_s16(q1s16, q14s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q10s16, q5s16); - q13s16 = vaddq_s16(q11s16, q4s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q4s16 = vsubq_s16(q11s16, q4s16); - q5s16 = vsubq_s16(q10s16, q5s16); - - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q3s16); - q13s16 = vaddq_s16(q1s16, q2s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q2s16 = vsubq_s16(q1s16, q2s16); - q3s16 = vsubq_s16(q0s16, q3s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - q12s16 = vaddq_s16(q10s16, q9s16); - q13s16 = vaddq_s16(q11s16, q8s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q8s16 = vsubq_s16(q11s16, q8s16); - q9s16 = vsubq_s16(q10s16, q9s16); - - d4u64 = vreinterpret_u64_s16(vget_low_s16(q2s16)); - d5u64 = vreinterpret_u64_s16(vget_high_s16(q2s16)); - d6u64 = vreinterpret_u64_s16(vget_low_s16(q3s16)); - d7u64 = vreinterpret_u64_s16(vget_high_s16(q3s16)); - d8u64 = vreinterpret_u64_s16(vget_low_s16(q4s16)); - d9u64 = vreinterpret_u64_s16(vget_high_s16(q4s16)); - d10u64 = vreinterpret_u64_s16(vget_low_s16(q5s16)); - d11u64 = vreinterpret_u64_s16(vget_high_s16(q5s16)); - d16u64 = vreinterpret_u64_s16(vget_low_s16(q8s16)); - d17u64 = vreinterpret_u64_s16(vget_high_s16(q8s16)); - d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); - d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); - d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); - d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); - d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); - d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); - - vst1_u64((uint64_t *)out, d16u64); - out += 4; - vst1_u64((uint64_t *)out, d17u64); - out += 12; - vst1_u64((uint64_t *)out, d18u64); - out += 4; - vst1_u64((uint64_t *)out, d19u64); - out += 12; - vst1_u64((uint64_t *)out, d4u64); - out += 4; - vst1_u64((uint64_t *)out, d5u64); - out += 12; - vst1_u64((uint64_t *)out, d6u64); - out += 4; - vst1_u64((uint64_t *)out, d7u64); - out += 12; - vst1_u64((uint64_t *)out, d8u64); - out += 4; - vst1_u64((uint64_t *)out, d9u64); - out += 12; - vst1_u64((uint64_t *)out, d10u64); - out += 4; - vst1_u64((uint64_t *)out, d11u64); - out += 12; - vst1_u64((uint64_t *)out, d28u64); - out += 4; - vst1_u64((uint64_t *)out, d29u64); - out += 12; - vst1_u64((uint64_t *)out, d30u64); - out += 4; - vst1_u64((uint64_t *)out, d31u64); - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c deleted file mode 100644 index 352979aa16..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_dsp/vpx_dsp_common.h" - -void vpx_idct16x16_256_add_neon_pass1(const int16_t *input, - int16_t *output, - int output_stride); -void vpx_idct16x16_256_add_neon_pass2(const int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); -void vpx_idct16x16_10_add_neon_pass1(const int16_t *input, - int16_t *output, - int output_stride); -void vpx_idct16x16_10_add_neon_pass2(const int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); - -#if HAVE_NEON_ASM -/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ -extern void vpx_push_neon(int64_t *store); -extern void vpx_pop_neon(int64_t *store); -#endif // HAVE_NEON_ASM - -void vpx_idct16x16_256_add_neon(const int16_t *input, - uint8_t *dest, int dest_stride) { -#if HAVE_NEON_ASM - int64_t store_reg[8]; -#endif - int16_t pass1_output[16*16] = {0}; - int16_t row_idct_output[16*16] = {0}; - -#if HAVE_NEON_ASM - // save d8-d15 register values. - vpx_push_neon(store_reg); -#endif - - /* Parallel idct on the upper 8 rows */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(input, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7 - // which will be saved into row_idct_output. - vpx_idct16x16_256_add_neon_pass2(input+1, - row_idct_output, - pass1_output, - 0, - dest, - dest_stride); - - /* Parallel idct on the lower 8 rows */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7 - // which will be saved into row_idct_output. - vpx_idct16x16_256_add_neon_pass2(input+8*16+1, - row_idct_output+8, - pass1_output, - 0, - dest, - dest_stride); - - /* Parallel idct on the left 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, - row_idct_output, - pass1_output, - 1, - dest, - dest_stride); - - /* Parallel idct on the right 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, - row_idct_output+8, - pass1_output, - 1, - dest+8, - dest_stride); - -#if HAVE_NEON_ASM - // restore d8-d15 register values. - vpx_pop_neon(store_reg); -#endif - - return; -} - -void vpx_idct16x16_10_add_neon(const int16_t *input, - uint8_t *dest, int dest_stride) { -#if HAVE_NEON_ASM - int64_t store_reg[8]; -#endif - int16_t pass1_output[16*16] = {0}; - int16_t row_idct_output[16*16] = {0}; - -#if HAVE_NEON_ASM - // save d8-d15 register values. - vpx_push_neon(store_reg); -#endif - - /* Parallel idct on the upper 8 rows */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_10_add_neon_pass1(input, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7 - // which will be saved into row_idct_output. - vpx_idct16x16_10_add_neon_pass2(input+1, - row_idct_output, - pass1_output, - 0, - dest, - dest_stride); - - /* Skip Parallel idct on the lower 8 rows as they are all 0s */ - - /* Parallel idct on the left 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, - row_idct_output, - pass1_output, - 1, - dest, - dest_stride); - - /* Parallel idct on the right 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, - row_idct_output+8, - pass1_output, - 1, - dest+8, - dest_stride); - -#if HAVE_NEON_ASM - // restore d8-d15 register values. - vpx_pop_neon(store_reg); -#endif - - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c deleted file mode 100644 index c25c0c4a5c..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_config.h" - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -static INLINE void LD_16x8( - uint8_t *d, - int d_stride, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - *q8u8 = vld1q_u8(d); - d += d_stride; - *q9u8 = vld1q_u8(d); - d += d_stride; - *q10u8 = vld1q_u8(d); - d += d_stride; - *q11u8 = vld1q_u8(d); - d += d_stride; - *q12u8 = vld1q_u8(d); - d += d_stride; - *q13u8 = vld1q_u8(d); - d += d_stride; - *q14u8 = vld1q_u8(d); - d += d_stride; - *q15u8 = vld1q_u8(d); - return; -} - -static INLINE void ADD_DIFF_16x8( - uint8x16_t qdiffu8, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - *q8u8 = vqaddq_u8(*q8u8, qdiffu8); - *q9u8 = vqaddq_u8(*q9u8, qdiffu8); - *q10u8 = vqaddq_u8(*q10u8, qdiffu8); - *q11u8 = vqaddq_u8(*q11u8, qdiffu8); - *q12u8 = vqaddq_u8(*q12u8, qdiffu8); - *q13u8 = vqaddq_u8(*q13u8, qdiffu8); - *q14u8 = vqaddq_u8(*q14u8, qdiffu8); - *q15u8 = vqaddq_u8(*q15u8, qdiffu8); - return; -} - -static INLINE void SUB_DIFF_16x8( - uint8x16_t qdiffu8, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - *q8u8 = vqsubq_u8(*q8u8, qdiffu8); - *q9u8 = vqsubq_u8(*q9u8, qdiffu8); - *q10u8 = vqsubq_u8(*q10u8, qdiffu8); - *q11u8 = vqsubq_u8(*q11u8, qdiffu8); - *q12u8 = vqsubq_u8(*q12u8, qdiffu8); - *q13u8 = vqsubq_u8(*q13u8, qdiffu8); - *q14u8 = vqsubq_u8(*q14u8, qdiffu8); - *q15u8 = vqsubq_u8(*q15u8, qdiffu8); - return; -} - -static INLINE void ST_16x8( - uint8_t *d, - int d_stride, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - vst1q_u8(d, *q8u8); - d += d_stride; - vst1q_u8(d, *q9u8); - d += d_stride; - vst1q_u8(d, *q10u8); - d += d_stride; - vst1q_u8(d, *q11u8); - d += d_stride; - vst1q_u8(d, *q12u8); - d += d_stride; - vst1q_u8(d, *q13u8); - d += d_stride; - vst1q_u8(d, *q14u8); - d += d_stride; - vst1q_u8(d, *q15u8); - return; -} - -void vpx_idct32x32_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int i, j, dest_stride8; - uint8_t *d; - int16_t a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 6); - - dest_stride8 = dest_stride * 8; - if (a1 >= 0) { // diff_positive_32_32 - a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1; - q0u8 = vdupq_n_u8(a1); - for (i = 0; i < 2; i++, dest += 16) { // diff_positive_32_32_loop - d = dest; - for (j = 0; j < 4; j++) { - LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - ADD_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - d += dest_stride8; - } - } - } else { // diff_negative_32_32 - a1 = -a1; - a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1; - q0u8 = vdupq_n_u8(a1); - for (i = 0; i < 2; i++, dest += 16) { // diff_negative_32_32_loop - d = dest; - for (j = 0; j < 4; j++) { - LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - SUB_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - d += dest_stride8; - } - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c deleted file mode 100644 index 025437eb96..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c +++ /dev/null @@ -1,719 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" - -#define LOAD_FROM_TRANSPOSED(prev, first, second) \ - q14s16 = vld1q_s16(trans_buf + first * 8); \ - q13s16 = vld1q_s16(trans_buf + second * 8); - -#define LOAD_FROM_OUTPUT(prev, first, second, qA, qB) \ - qA = vld1q_s16(out + first * 32); \ - qB = vld1q_s16(out + second * 32); - -#define STORE_IN_OUTPUT(prev, first, second, qA, qB) \ - vst1q_s16(out + first * 32, qA); \ - vst1q_s16(out + second * 32, qB); - -#define STORE_COMBINE_CENTER_RESULTS(r10, r9) \ - __STORE_COMBINE_CENTER_RESULTS(r10, r9, stride, \ - q6s16, q7s16, q8s16, q9s16); -static INLINE void __STORE_COMBINE_CENTER_RESULTS( - uint8_t *p1, - uint8_t *p2, - int stride, - int16x8_t q6s16, - int16x8_t q7s16, - int16x8_t q8s16, - int16x8_t q9s16) { - int16x4_t d8s16, d9s16, d10s16, d11s16; - - d8s16 = vld1_s16((int16_t *)p1); - p1 += stride; - d11s16 = vld1_s16((int16_t *)p2); - p2 -= stride; - d9s16 = vld1_s16((int16_t *)p1); - d10s16 = vld1_s16((int16_t *)p2); - - q7s16 = vrshrq_n_s16(q7s16, 6); - q8s16 = vrshrq_n_s16(q8s16, 6); - q9s16 = vrshrq_n_s16(q9s16, 6); - q6s16 = vrshrq_n_s16(q6s16, 6); - - q7s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q7s16), - vreinterpret_u8_s16(d9s16))); - q8s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_s16(d10s16))); - q9s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_s16(d11s16))); - q6s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q6s16), - vreinterpret_u8_s16(d8s16))); - - d9s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16)); - d10s16 = vreinterpret_s16_u8(vqmovun_s16(q8s16)); - d11s16 = vreinterpret_s16_u8(vqmovun_s16(q9s16)); - d8s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16)); - - vst1_s16((int16_t *)p1, d9s16); - p1 -= stride; - vst1_s16((int16_t *)p2, d10s16); - p2 += stride; - vst1_s16((int16_t *)p1, d8s16); - vst1_s16((int16_t *)p2, d11s16); - return; -} - -#define STORE_COMBINE_EXTREME_RESULTS(r7, r6); \ - __STORE_COMBINE_EXTREME_RESULTS(r7, r6, stride, \ - q4s16, q5s16, q6s16, q7s16); -static INLINE void __STORE_COMBINE_EXTREME_RESULTS( - uint8_t *p1, - uint8_t *p2, - int stride, - int16x8_t q4s16, - int16x8_t q5s16, - int16x8_t q6s16, - int16x8_t q7s16) { - int16x4_t d4s16, d5s16, d6s16, d7s16; - - d4s16 = vld1_s16((int16_t *)p1); - p1 += stride; - d7s16 = vld1_s16((int16_t *)p2); - p2 -= stride; - d5s16 = vld1_s16((int16_t *)p1); - d6s16 = vld1_s16((int16_t *)p2); - - q5s16 = vrshrq_n_s16(q5s16, 6); - q6s16 = vrshrq_n_s16(q6s16, 6); - q7s16 = vrshrq_n_s16(q7s16, 6); - q4s16 = vrshrq_n_s16(q4s16, 6); - - q5s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q5s16), - vreinterpret_u8_s16(d5s16))); - q6s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q6s16), - vreinterpret_u8_s16(d6s16))); - q7s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q7s16), - vreinterpret_u8_s16(d7s16))); - q4s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q4s16), - vreinterpret_u8_s16(d4s16))); - - d5s16 = vreinterpret_s16_u8(vqmovun_s16(q5s16)); - d6s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16)); - d7s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16)); - d4s16 = vreinterpret_s16_u8(vqmovun_s16(q4s16)); - - vst1_s16((int16_t *)p1, d5s16); - p1 -= stride; - vst1_s16((int16_t *)p2, d6s16); - p2 += stride; - vst1_s16((int16_t *)p2, d7s16); - vst1_s16((int16_t *)p1, d4s16); - return; -} - -#define DO_BUTTERFLY_STD(const_1, const_2, qA, qB) \ - DO_BUTTERFLY(q14s16, q13s16, const_1, const_2, qA, qB); -static INLINE void DO_BUTTERFLY( - int16x8_t q14s16, - int16x8_t q13s16, - int16_t first_const, - int16_t second_const, - int16x8_t *qAs16, - int16x8_t *qBs16) { - int16x4_t d30s16, d31s16; - int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q15s32; - int16x4_t dCs16, dDs16, dAs16, dBs16; - - dCs16 = vget_low_s16(q14s16); - dDs16 = vget_high_s16(q14s16); - dAs16 = vget_low_s16(q13s16); - dBs16 = vget_high_s16(q13s16); - - d30s16 = vdup_n_s16(first_const); - d31s16 = vdup_n_s16(second_const); - - q8s32 = vmull_s16(dCs16, d30s16); - q10s32 = vmull_s16(dAs16, d31s16); - q9s32 = vmull_s16(dDs16, d30s16); - q11s32 = vmull_s16(dBs16, d31s16); - q12s32 = vmull_s16(dCs16, d31s16); - - q8s32 = vsubq_s32(q8s32, q10s32); - q9s32 = vsubq_s32(q9s32, q11s32); - - q10s32 = vmull_s16(dDs16, d31s16); - q11s32 = vmull_s16(dAs16, d30s16); - q15s32 = vmull_s16(dBs16, d30s16); - - q11s32 = vaddq_s32(q12s32, q11s32); - q10s32 = vaddq_s32(q10s32, q15s32); - - *qAs16 = vcombine_s16(vqrshrn_n_s32(q8s32, 14), - vqrshrn_n_s32(q9s32, 14)); - *qBs16 = vcombine_s16(vqrshrn_n_s32(q11s32, 14), - vqrshrn_n_s32(q10s32, 14)); - return; -} - -static INLINE void idct32_transpose_pair( - int16_t *input, - int16_t *t_buf) { - int16_t *in; - int i; - const int stride = 32; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - for (i = 0; i < 4; i++, input += 8) { - in = input; - q8s16 = vld1q_s16(in); - in += stride; - q9s16 = vld1q_s16(in); - in += stride; - q10s16 = vld1q_s16(in); - in += stride; - q11s16 = vld1q_s16(in); - in += stride; - q12s16 = vld1q_s16(in); - in += stride; - q13s16 = vld1q_s16(in); - in += stride; - q14s16 = vld1q_s16(in); - in += stride; - q15s16 = vld1q_s16(in); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - d30s16 = vget_low_s16(q15s16); - d31s16 = vget_high_s16(q15s16); - - q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - q12s16 = vcombine_s16(d17s16, d25s16); - q13s16 = vcombine_s16(d19s16, d27s16); - q14s16 = vcombine_s16(d21s16, d29s16); - q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), - vreinterpretq_s32_s16(q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q9s16), - vreinterpretq_s32_s16(q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q12s16), - vreinterpretq_s32_s16(q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q13s16), - vreinterpretq_s32_s16(q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - vst1q_s16(t_buf, q0x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q0x2s16.val[1]); - t_buf += 8; - vst1q_s16(t_buf, q1x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q1x2s16.val[1]); - t_buf += 8; - vst1q_s16(t_buf, q2x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q2x2s16.val[1]); - t_buf += 8; - vst1q_s16(t_buf, q3x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q3x2s16.val[1]); - t_buf += 8; - } - return; -} - -static INLINE void idct32_bands_end_1st_pass( - int16_t *out, - int16x8_t q2s16, - int16x8_t q3s16, - int16x8_t q6s16, - int16x8_t q7s16, - int16x8_t q8s16, - int16x8_t q9s16, - int16x8_t q10s16, - int16x8_t q11s16, - int16x8_t q12s16, - int16x8_t q13s16, - int16x8_t q14s16, - int16x8_t q15s16) { - int16x8_t q0s16, q1s16, q4s16, q5s16; - - STORE_IN_OUTPUT(17, 16, 17, q6s16, q7s16); - STORE_IN_OUTPUT(17, 14, 15, q8s16, q9s16); - - LOAD_FROM_OUTPUT(15, 30, 31, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(31, 30, 31, q6s16, q7s16); - STORE_IN_OUTPUT(31, 0, 1, q4s16, q5s16); - - LOAD_FROM_OUTPUT(1, 12, 13, q0s16, q1s16); - q2s16 = vaddq_s16(q10s16, q1s16); - q3s16 = vaddq_s16(q11s16, q0s16); - q4s16 = vsubq_s16(q11s16, q0s16); - q5s16 = vsubq_s16(q10s16, q1s16); - - LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16); - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_IN_OUTPUT(19, 18, 19, q6s16, q7s16); - STORE_IN_OUTPUT(19, 12, 13, q8s16, q9s16); - - LOAD_FROM_OUTPUT(13, 28, 29, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(29, 28, 29, q6s16, q7s16); - STORE_IN_OUTPUT(29, 2, 3, q4s16, q5s16); - - LOAD_FROM_OUTPUT(3, 10, 11, q0s16, q1s16); - q2s16 = vaddq_s16(q12s16, q1s16); - q3s16 = vaddq_s16(q13s16, q0s16); - q4s16 = vsubq_s16(q13s16, q0s16); - q5s16 = vsubq_s16(q12s16, q1s16); - - LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16); - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_IN_OUTPUT(21, 20, 21, q6s16, q7s16); - STORE_IN_OUTPUT(21, 10, 11, q8s16, q9s16); - - LOAD_FROM_OUTPUT(11, 26, 27, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(27, 26, 27, q6s16, q7s16); - STORE_IN_OUTPUT(27, 4, 5, q4s16, q5s16); - - LOAD_FROM_OUTPUT(5, 8, 9, q0s16, q1s16); - q2s16 = vaddq_s16(q14s16, q1s16); - q3s16 = vaddq_s16(q15s16, q0s16); - q4s16 = vsubq_s16(q15s16, q0s16); - q5s16 = vsubq_s16(q14s16, q1s16); - - LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16); - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_IN_OUTPUT(23, 22, 23, q6s16, q7s16); - STORE_IN_OUTPUT(23, 8, 9, q8s16, q9s16); - - LOAD_FROM_OUTPUT(9, 24, 25, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(25, 24, 25, q6s16, q7s16); - STORE_IN_OUTPUT(25, 6, 7, q4s16, q5s16); - return; -} - -static INLINE void idct32_bands_end_2nd_pass( - int16_t *out, - uint8_t *dest, - int stride, - int16x8_t q2s16, - int16x8_t q3s16, - int16x8_t q6s16, - int16x8_t q7s16, - int16x8_t q8s16, - int16x8_t q9s16, - int16x8_t q10s16, - int16x8_t q11s16, - int16x8_t q12s16, - int16x8_t q13s16, - int16x8_t q14s16, - int16x8_t q15s16) { - uint8_t *r6 = dest + 31 * stride; - uint8_t *r7 = dest/* + 0 * stride*/; - uint8_t *r9 = dest + 15 * stride; - uint8_t *r10 = dest + 16 * stride; - int str2 = stride << 1; - int16x8_t q0s16, q1s16, q4s16, q5s16; - - STORE_COMBINE_CENTER_RESULTS(r10, r9); - r10 += str2; r9 -= str2; - - LOAD_FROM_OUTPUT(17, 30, 31, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - r7 += str2; r6 -= str2; - - LOAD_FROM_OUTPUT(31, 12, 13, q0s16, q1s16) - q2s16 = vaddq_s16(q10s16, q1s16); - q3s16 = vaddq_s16(q11s16, q0s16); - q4s16 = vsubq_s16(q11s16, q0s16); - q5s16 = vsubq_s16(q10s16, q1s16); - - LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_COMBINE_CENTER_RESULTS(r10, r9); - r10 += str2; r9 -= str2; - - LOAD_FROM_OUTPUT(19, 28, 29, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - r7 += str2; r6 -= str2; - - LOAD_FROM_OUTPUT(29, 10, 11, q0s16, q1s16) - q2s16 = vaddq_s16(q12s16, q1s16); - q3s16 = vaddq_s16(q13s16, q0s16); - q4s16 = vsubq_s16(q13s16, q0s16); - q5s16 = vsubq_s16(q12s16, q1s16); - - LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_COMBINE_CENTER_RESULTS(r10, r9); - r10 += str2; r9 -= str2; - - LOAD_FROM_OUTPUT(21, 26, 27, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - r7 += str2; r6 -= str2; - - LOAD_FROM_OUTPUT(27, 8, 9, q0s16, q1s16) - q2s16 = vaddq_s16(q14s16, q1s16); - q3s16 = vaddq_s16(q15s16, q0s16); - q4s16 = vsubq_s16(q15s16, q0s16); - q5s16 = vsubq_s16(q14s16, q1s16); - - LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_COMBINE_CENTER_RESULTS(r10, r9); - - LOAD_FROM_OUTPUT(23, 24, 25, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - return; -} - -void vpx_idct32x32_1024_add_neon( - int16_t *input, - uint8_t *dest, - int stride) { - int i, idct32_pass_loop; - int16_t trans_buf[32 * 8]; - int16_t pass1[32 * 32]; - int16_t pass2[32 * 32]; - int16_t *out; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - - for (idct32_pass_loop = 0, out = pass1; - idct32_pass_loop < 2; - idct32_pass_loop++, - input = pass1, // the input of pass2 is the result of pass1 - out = pass2) { - for (i = 0; - i < 4; i++, - input += 32 * 8, out += 8) { // idct32_bands_loop - idct32_transpose_pair(input, trans_buf); - - // ----------------------------------------- - // BLOCK A: 16-19,28-31 - // ----------------------------------------- - // generate 16,17,30,31 - // part of stage 1 - LOAD_FROM_TRANSPOSED(0, 1, 31) - DO_BUTTERFLY_STD(cospi_31_64, cospi_1_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(31, 17, 15) - DO_BUTTERFLY_STD(cospi_15_64, cospi_17_64, &q1s16, &q3s16) - // part of stage 2 - q4s16 = vaddq_s16(q0s16, q1s16); - q13s16 = vsubq_s16(q0s16, q1s16); - q6s16 = vaddq_s16(q2s16, q3s16); - q14s16 = vsubq_s16(q2s16, q3s16); - // part of stage 3 - DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q5s16, &q7s16) - - // generate 18,19,28,29 - // part of stage 1 - LOAD_FROM_TRANSPOSED(15, 9, 23) - DO_BUTTERFLY_STD(cospi_23_64, cospi_9_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(23, 25, 7) - DO_BUTTERFLY_STD(cospi_7_64, cospi_25_64, &q1s16, &q3s16) - // part of stage 2 - q13s16 = vsubq_s16(q3s16, q2s16); - q3s16 = vaddq_s16(q3s16, q2s16); - q14s16 = vsubq_s16(q1s16, q0s16); - q2s16 = vaddq_s16(q1s16, q0s16); - // part of stage 3 - DO_BUTTERFLY_STD(-cospi_4_64, -cospi_28_64, &q1s16, &q0s16) - // part of stage 4 - q8s16 = vaddq_s16(q4s16, q2s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q10s16 = vaddq_s16(q7s16, q1s16); - q15s16 = vaddq_s16(q6s16, q3s16); - q13s16 = vsubq_s16(q5s16, q0s16); - q14s16 = vsubq_s16(q7s16, q1s16); - STORE_IN_OUTPUT(0, 16, 31, q8s16, q15s16) - STORE_IN_OUTPUT(31, 17, 30, q9s16, q10s16) - // part of stage 5 - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q0s16, &q1s16) - STORE_IN_OUTPUT(30, 29, 18, q1s16, q0s16) - // part of stage 4 - q13s16 = vsubq_s16(q4s16, q2s16); - q14s16 = vsubq_s16(q6s16, q3s16); - // part of stage 5 - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q4s16, &q6s16) - STORE_IN_OUTPUT(18, 19, 28, q4s16, q6s16) - - // ----------------------------------------- - // BLOCK B: 20-23,24-27 - // ----------------------------------------- - // generate 20,21,26,27 - // part of stage 1 - LOAD_FROM_TRANSPOSED(7, 5, 27) - DO_BUTTERFLY_STD(cospi_27_64, cospi_5_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(27, 21, 11) - DO_BUTTERFLY_STD(cospi_11_64, cospi_21_64, &q1s16, &q3s16) - // part of stage 2 - q13s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q14s16 = vsubq_s16(q2s16, q3s16); - q2s16 = vaddq_s16(q2s16, q3s16); - // part of stage 3 - DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16) - - // generate 22,23,24,25 - // part of stage 1 - LOAD_FROM_TRANSPOSED(11, 13, 19) - DO_BUTTERFLY_STD(cospi_19_64, cospi_13_64, &q5s16, &q7s16) - LOAD_FROM_TRANSPOSED(19, 29, 3) - DO_BUTTERFLY_STD(cospi_3_64, cospi_29_64, &q4s16, &q6s16) - // part of stage 2 - q14s16 = vsubq_s16(q4s16, q5s16); - q5s16 = vaddq_s16(q4s16, q5s16); - q13s16 = vsubq_s16(q6s16, q7s16); - q6s16 = vaddq_s16(q6s16, q7s16); - // part of stage 3 - DO_BUTTERFLY_STD(-cospi_20_64, -cospi_12_64, &q4s16, &q7s16) - // part of stage 4 - q10s16 = vaddq_s16(q7s16, q1s16); - q11s16 = vaddq_s16(q5s16, q0s16); - q12s16 = vaddq_s16(q6s16, q2s16); - q15s16 = vaddq_s16(q4s16, q3s16); - // part of stage 6 - LOAD_FROM_OUTPUT(28, 16, 17, q14s16, q13s16) - q8s16 = vaddq_s16(q14s16, q11s16); - q9s16 = vaddq_s16(q13s16, q10s16); - q13s16 = vsubq_s16(q13s16, q10s16); - q11s16 = vsubq_s16(q14s16, q11s16); - STORE_IN_OUTPUT(17, 17, 16, q9s16, q8s16) - LOAD_FROM_OUTPUT(16, 30, 31, q14s16, q9s16) - q8s16 = vsubq_s16(q9s16, q12s16); - q10s16 = vaddq_s16(q14s16, q15s16); - q14s16 = vsubq_s16(q14s16, q15s16); - q12s16 = vaddq_s16(q9s16, q12s16); - STORE_IN_OUTPUT(31, 30, 31, q10s16, q12s16) - // part of stage 7 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) - STORE_IN_OUTPUT(31, 25, 22, q14s16, q13s16) - q13s16 = q11s16; - q14s16 = q8s16; - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) - STORE_IN_OUTPUT(22, 24, 23, q14s16, q13s16) - // part of stage 4 - q14s16 = vsubq_s16(q5s16, q0s16); - q13s16 = vsubq_s16(q6s16, q2s16); - DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q5s16, &q6s16); - q14s16 = vsubq_s16(q7s16, q1s16); - q13s16 = vsubq_s16(q4s16, q3s16); - DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q0s16, &q1s16); - // part of stage 6 - LOAD_FROM_OUTPUT(23, 18, 19, q14s16, q13s16) - q8s16 = vaddq_s16(q14s16, q1s16); - q9s16 = vaddq_s16(q13s16, q6s16); - q13s16 = vsubq_s16(q13s16, q6s16); - q1s16 = vsubq_s16(q14s16, q1s16); - STORE_IN_OUTPUT(19, 18, 19, q8s16, q9s16) - LOAD_FROM_OUTPUT(19, 28, 29, q8s16, q9s16) - q14s16 = vsubq_s16(q8s16, q5s16); - q10s16 = vaddq_s16(q8s16, q5s16); - q11s16 = vaddq_s16(q9s16, q0s16); - q0s16 = vsubq_s16(q9s16, q0s16); - STORE_IN_OUTPUT(29, 28, 29, q10s16, q11s16) - // part of stage 7 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) - STORE_IN_OUTPUT(29, 20, 27, q13s16, q14s16) - DO_BUTTERFLY(q0s16, q1s16, cospi_16_64, cospi_16_64, - &q1s16, &q0s16); - STORE_IN_OUTPUT(27, 21, 26, q1s16, q0s16) - - // ----------------------------------------- - // BLOCK C: 8-10,11-15 - // ----------------------------------------- - // generate 8,9,14,15 - // part of stage 2 - LOAD_FROM_TRANSPOSED(3, 2, 30) - DO_BUTTERFLY_STD(cospi_30_64, cospi_2_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(30, 18, 14) - DO_BUTTERFLY_STD(cospi_14_64, cospi_18_64, &q1s16, &q3s16) - // part of stage 3 - q13s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q14s16 = vsubq_s16(q2s16, q3s16); - q2s16 = vaddq_s16(q2s16, q3s16); - // part of stage 4 - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q1s16, &q3s16) - - // generate 10,11,12,13 - // part of stage 2 - LOAD_FROM_TRANSPOSED(14, 10, 22) - DO_BUTTERFLY_STD(cospi_22_64, cospi_10_64, &q5s16, &q7s16) - LOAD_FROM_TRANSPOSED(22, 26, 6) - DO_BUTTERFLY_STD(cospi_6_64, cospi_26_64, &q4s16, &q6s16) - // part of stage 3 - q14s16 = vsubq_s16(q4s16, q5s16); - q5s16 = vaddq_s16(q4s16, q5s16); - q13s16 = vsubq_s16(q6s16, q7s16); - q6s16 = vaddq_s16(q6s16, q7s16); - // part of stage 4 - DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q4s16, &q7s16) - // part of stage 5 - q8s16 = vaddq_s16(q0s16, q5s16); - q9s16 = vaddq_s16(q1s16, q7s16); - q13s16 = vsubq_s16(q1s16, q7s16); - q14s16 = vsubq_s16(q3s16, q4s16); - q10s16 = vaddq_s16(q3s16, q4s16); - q15s16 = vaddq_s16(q2s16, q6s16); - STORE_IN_OUTPUT(26, 8, 15, q8s16, q15s16) - STORE_IN_OUTPUT(15, 9, 14, q9s16, q10s16) - // part of stage 6 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) - STORE_IN_OUTPUT(14, 13, 10, q3s16, q1s16) - q13s16 = vsubq_s16(q0s16, q5s16); - q14s16 = vsubq_s16(q2s16, q6s16); - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) - STORE_IN_OUTPUT(10, 11, 12, q1s16, q3s16) - - // ----------------------------------------- - // BLOCK D: 0-3,4-7 - // ----------------------------------------- - // generate 4,5,6,7 - // part of stage 3 - LOAD_FROM_TRANSPOSED(6, 4, 28) - DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(28, 20, 12) - DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16) - // part of stage 4 - q13s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q14s16 = vsubq_s16(q2s16, q3s16); - q2s16 = vaddq_s16(q2s16, q3s16); - // part of stage 5 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) - - // generate 0,1,2,3 - // part of stage 4 - LOAD_FROM_TRANSPOSED(12, 0, 16) - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q5s16, &q7s16) - LOAD_FROM_TRANSPOSED(16, 8, 24) - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q14s16, &q6s16) - // part of stage 5 - q4s16 = vaddq_s16(q7s16, q6s16); - q7s16 = vsubq_s16(q7s16, q6s16); - q6s16 = vsubq_s16(q5s16, q14s16); - q5s16 = vaddq_s16(q5s16, q14s16); - // part of stage 6 - q8s16 = vaddq_s16(q4s16, q2s16); - q9s16 = vaddq_s16(q5s16, q3s16); - q10s16 = vaddq_s16(q6s16, q1s16); - q11s16 = vaddq_s16(q7s16, q0s16); - q12s16 = vsubq_s16(q7s16, q0s16); - q13s16 = vsubq_s16(q6s16, q1s16); - q14s16 = vsubq_s16(q5s16, q3s16); - q15s16 = vsubq_s16(q4s16, q2s16); - // part of stage 7 - LOAD_FROM_OUTPUT(12, 14, 15, q0s16, q1s16) - q2s16 = vaddq_s16(q8s16, q1s16); - q3s16 = vaddq_s16(q9s16, q0s16); - q4s16 = vsubq_s16(q9s16, q0s16); - q5s16 = vsubq_s16(q8s16, q1s16); - LOAD_FROM_OUTPUT(15, 16, 17, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - - if (idct32_pass_loop == 0) { - idct32_bands_end_1st_pass(out, - q2s16, q3s16, q6s16, q7s16, q8s16, q9s16, - q10s16, q11s16, q12s16, q13s16, q14s16, q15s16); - } else { - idct32_bands_end_2nd_pass(out, dest, stride, - q2s16, q3s16, q6s16, q7s16, q8s16, q9s16, - q10s16, q11s16, q12s16, q13s16, q14s16, q15s16); - dest += 8; - } - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c deleted file mode 100644 index ea618700c9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vpx_idct4x4_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d6u8; - uint32x2_t d2u32 = vdup_n_u32(0); - uint16x8_t q8u16; - int16x8_t q0s16; - uint8_t *d1, *d2; - int16_t i, a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 4); - - q0s16 = vdupq_n_s16(a1); - - // dc_only_idct_add - d1 = d2 = dest; - for (i = 0; i < 2; i++) { - d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 0); - d1 += dest_stride; - d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q0s16), - vreinterpret_u8_u32(d2u32)); - d6u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - - vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 0); - d2 += dest_stride; - vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 1); - d2 += dest_stride; - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c deleted file mode 100644 index 3c975c99b7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void vpx_idct4x4_16_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d26u8, d27u8; - uint32x2_t d26u32, d27u32; - uint16x8_t q8u16, q9u16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16; - int16x4_t d22s16, d23s16, d24s16, d26s16, d27s16, d28s16, d29s16; - int16x8_t q8s16, q9s16, q13s16, q14s16; - int32x4_t q1s32, q13s32, q14s32, q15s32; - int16x4x2_t d0x2s16, d1x2s16; - int32x4x2_t q0x2s32; - uint8_t *d; - int16_t cospi_8_64 = 15137; - int16_t cospi_16_64 = 11585; - int16_t cospi_24_64 = 6270; - - d26u32 = d27u32 = vdup_n_u32(0); - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - - d0x2s16 = vtrn_s16(d16s16, d17s16); - d1x2s16 = vtrn_s16(d18s16, d19s16); - q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]); - q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]); - - d20s16 = vdup_n_s16(cospi_8_64); - d21s16 = vdup_n_s16(cospi_16_64); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), - vreinterpretq_s32_s16(q9s16)); - d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - - d22s16 = vdup_n_s16(cospi_24_64); - - // stage 1 - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, d22s16); - q1s32 = vmull_s16(d17s16, d20s16); - q13s32 = vmull_s16(d23s16, d21s16); - q14s32 = vmull_s16(d24s16, d21s16); - - q15s32 = vmlsl_s16(q15s32, d19s16, d20s16); - q1s32 = vmlal_s16(q1s32, d19s16, d22s16); - - d26s16 = vqrshrn_n_s32(q13s32, 14); - d27s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d28s16 = vqrshrn_n_s32(q1s32, 14); - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - - // stage 2 - q8s16 = vaddq_s16(q13s16, q14s16); - q9s16 = vsubq_s16(q13s16, q14s16); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_high_s16(q9s16); // vswp d18 d19 - d19s16 = vget_low_s16(q9s16); - - d0x2s16 = vtrn_s16(d16s16, d17s16); - d1x2s16 = vtrn_s16(d18s16, d19s16); - q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]); - q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), - vreinterpretq_s32_s16(q9s16)); - d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - - // do the transform on columns - // stage 1 - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, d22s16); - q1s32 = vmull_s16(d17s16, d20s16); - q13s32 = vmull_s16(d23s16, d21s16); - q14s32 = vmull_s16(d24s16, d21s16); - - q15s32 = vmlsl_s16(q15s32, d19s16, d20s16); - q1s32 = vmlal_s16(q1s32, d19s16, d22s16); - - d26s16 = vqrshrn_n_s32(q13s32, 14); - d27s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d28s16 = vqrshrn_n_s32(q1s32, 14); - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - - // stage 2 - q8s16 = vaddq_s16(q13s16, q14s16); - q9s16 = vsubq_s16(q13s16, q14s16); - - q8s16 = vrshrq_n_s16(q8s16, 4); - q9s16 = vrshrq_n_s16(q9s16, 4); - - d = dest; - d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 0); - d += dest_stride; - d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 1); - d += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 1); - d += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 0); - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u32(d26u32)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u32(d27u32)); - - d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - - d = dest; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 0); - d += dest_stride; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 1); - d += dest_stride; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 1); - d += dest_stride; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 0); - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c deleted file mode 100644 index c1b801fad5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vpx_idct8x8_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d2u8, d3u8, d30u8, d31u8; - uint64x1_t d2u64, d3u64, d4u64, d5u64; - uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q0s16; - uint8_t *d1, *d2; - int16_t i, a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 5); - - q0s16 = vdupq_n_s16(a1); - q0u16 = vreinterpretq_u16_s16(q0s16); - - d1 = d2 = dest; - for (i = 0; i < 2; i++) { - d2u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - d4u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - d5u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - - q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64)); - q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64)); - q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64)); - q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d31u8)); - d2 += dest_stride; - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c deleted file mode 100644 index 4b2c2a6f83..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" - -static INLINE void TRANSPOSE8X8( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - *q12s16 = vcombine_s16(d17s16, d25s16); - *q13s16 = vcombine_s16(d19s16, d27s16); - *q14s16 = vcombine_s16(d21s16, d29s16); - *q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), - vreinterpretq_s32_s16(*q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), - vreinterpretq_s32_s16(*q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), - vreinterpretq_s32_s16(*q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), - vreinterpretq_s32_s16(*q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - *q8s16 = q0x2s16.val[0]; - *q9s16 = q0x2s16.val[1]; - *q10s16 = q1x2s16.val[0]; - *q11s16 = q1x2s16.val[1]; - *q12s16 = q2x2s16.val[0]; - *q13s16 = q2x2s16.val[1]; - *q14s16 = q3x2s16.val[0]; - *q15s16 = q3x2s16.val[1]; - return; -} - -static INLINE void IDCT8x8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d26s16, d2s16); - q6s32 = vmull_s16(d27s16, d2s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); - q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); - - d8s16 = vqrshrn_n_s32(q2s32, 14); - d9s16 = vqrshrn_n_s32(q3s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q2s32 = vmull_s16(d18s16, d1s16); - q3s32 = vmull_s16(d19s16, d1s16); - q9s32 = vmull_s16(d26s16, d3s16); - q13s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlal_s16(q2s32, d30s16, d0s16); - q3s32 = vmlal_s16(q3s32, d31s16, d0s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q13s32 = vmlal_s16(q13s32, d23s16, d2s16); - - d14s16 = vqrshrn_n_s32(q2s32, 14); - d15s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q13s32, 14); - q6s16 = vcombine_s16(d12s16, d13s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d0s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d0s16); - q3s32 = vmull_s16(d17s16, d0s16); - q13s32 = vmull_s16(d16s16, d0s16); - q15s32 = vmull_s16(d17s16, d0s16); - - q2s32 = vmlal_s16(q2s32, d24s16, d0s16); - q3s32 = vmlal_s16(q3s32, d25s16, d0s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); - q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); - - d0s16 = vdup_n_s16(cospi_24_64); - d1s16 = vdup_n_s16(cospi_8_64); - - d18s16 = vqrshrn_n_s32(q2s32, 14); - d19s16 = vqrshrn_n_s32(q3s32, 14); - d22s16 = vqrshrn_n_s32(q13s32, 14); - d23s16 = vqrshrn_n_s32(q15s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q2s32 = vmull_s16(d20s16, d0s16); - q3s32 = vmull_s16(d21s16, d0s16); - q8s32 = vmull_s16(d20s16, d1s16); - q12s32 = vmull_s16(d21s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); - q8s32 = vmlal_s16(q8s32, d28s16, d0s16); - q12s32 = vmlal_s16(q12s32, d29s16, d0s16); - - d26s16 = vqrshrn_n_s32(q2s32, 14); - d27s16 = vqrshrn_n_s32(q3s32, 14); - d30s16 = vqrshrn_n_s32(q8s32, 14); - d31s16 = vqrshrn_n_s32(q12s32, 14); - *q13s16 = vcombine_s16(d26s16, d27s16); - *q15s16 = vcombine_s16(d30s16, d31s16); - - q0s16 = vaddq_s16(*q9s16, *q15s16); - q1s16 = vaddq_s16(*q11s16, *q13s16); - q2s16 = vsubq_s16(*q11s16, *q13s16); - q3s16 = vsubq_s16(*q9s16, *q15s16); - - *q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - *q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vqrshrn_n_s32(q9s32, 14); - d11s16 = vqrshrn_n_s32(q10s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - *q8s16 = vaddq_s16(q0s16, q7s16); - *q9s16 = vaddq_s16(q1s16, q6s16); - *q10s16 = vaddq_s16(q2s16, q5s16); - *q11s16 = vaddq_s16(q3s16, q4s16); - *q12s16 = vsubq_s16(q3s16, q4s16); - *q13s16 = vsubq_s16(q2s16, q5s16); - *q14s16 = vsubq_s16(q1s16, q6s16); - *q15s16 = vsubq_s16(q0s16, q7s16); - return; -} - -void vpx_idct8x8_64_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 16); - q11s16 = vld1q_s16(input + 24); - q12s16 = vld1q_s16(input + 32); - q13s16 = vld1q_s16(input + 40); - q14s16 = vld1q_s16(input + 48); - q15s16 = vld1q_s16(input + 56); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - d1 = d2 = dest; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - return; -} - -void vpx_idct8x8_12_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - int16x4_t d10s16, d11s16, d12s16, d13s16, d16s16; - int16x4_t d26s16, d27s16, d28s16, d29s16; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - int32x4_t q9s32, q10s32, q11s32, q12s32; - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 16); - q11s16 = vld1q_s16(input + 24); - q12s16 = vld1q_s16(input + 32); - q13s16 = vld1q_s16(input + 40); - q14s16 = vld1q_s16(input + 48); - q15s16 = vld1q_s16(input + 56); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // First transform rows - // stage 1 - q0s16 = vdupq_n_s16(cospi_28_64 * 2); - q1s16 = vdupq_n_s16(cospi_4_64 * 2); - - q4s16 = vqrdmulhq_s16(q9s16, q0s16); - - q0s16 = vdupq_n_s16(-cospi_20_64 * 2); - - q7s16 = vqrdmulhq_s16(q9s16, q1s16); - - q1s16 = vdupq_n_s16(cospi_12_64 * 2); - - q5s16 = vqrdmulhq_s16(q11s16, q0s16); - - q0s16 = vdupq_n_s16(cospi_16_64 * 2); - - q6s16 = vqrdmulhq_s16(q11s16, q1s16); - - // stage 2 & stage 3 - even half - q1s16 = vdupq_n_s16(cospi_24_64 * 2); - - q9s16 = vqrdmulhq_s16(q8s16, q0s16); - - q0s16 = vdupq_n_s16(cospi_8_64 * 2); - - q13s16 = vqrdmulhq_s16(q10s16, q1s16); - - q15s16 = vqrdmulhq_s16(q10s16, q0s16); - - // stage 3 -odd half - q0s16 = vaddq_s16(q9s16, q15s16); - q1s16 = vaddq_s16(q9s16, q13s16); - q2s16 = vsubq_s16(q9s16, q13s16); - q3s16 = vsubq_s16(q9s16, q15s16); - - // stage 2 - odd half - q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vqrshrn_n_s32(q9s32, 14); - d11s16 = vqrshrn_n_s32(q10s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 4 - q8s16 = vaddq_s16(q0s16, q7s16); - q9s16 = vaddq_s16(q1s16, q6s16); - q10s16 = vaddq_s16(q2s16, q5s16); - q11s16 = vaddq_s16(q3s16, q4s16); - q12s16 = vsubq_s16(q3s16, q4s16); - q13s16 = vsubq_s16(q2s16, q5s16); - q14s16 = vsubq_s16(q1s16, q6s16); - q15s16 = vsubq_s16(q0s16, q7s16); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - d1 = d2 = dest; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c b/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c deleted file mode 100644 index 0a376104d2..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c +++ /dev/null @@ -1,822 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" - -//------------------------------------------------------------------------------ -// DC 4x4 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x8_t A = vld1_u8(above); // top row - const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top - const uint16x4_t p1 = vpadd_u16(p0, p0); - sum_top = vcombine_u16(p1, p1); - } - - if (do_left) { - const uint8x8_t L = vld1_u8(left); // left border - const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left - const uint16x4_t p1 = vpadd_u16(p0, p0); - sum_left = vcombine_u16(p1, p1); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 3); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 2); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 2); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x8_t dc = vdup_lane_u8(dc0, 0); - int i; - for (i = 0; i < 4; ++i) { - vst1_lane_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc), 0); - } - } -} - -void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_4x4(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - dc_4x4(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)left; - dc_4x4(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - (void)left; - dc_4x4(dst, stride, NULL, NULL, 0, 0); -} - -//------------------------------------------------------------------------------ -// DC 8x8 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x8_t A = vld1_u8(above); // top row - const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top - const uint16x4_t p1 = vpadd_u16(p0, p0); - const uint16x4_t p2 = vpadd_u16(p1, p1); - sum_top = vcombine_u16(p2, p2); - } - - if (do_left) { - const uint8x8_t L = vld1_u8(left); // left border - const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left - const uint16x4_t p1 = vpadd_u16(p0, p0); - const uint16x4_t p2 = vpadd_u16(p1, p1); - sum_left = vcombine_u16(p2, p2); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 4); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 3); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 3); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x8_t dc = vdup_lane_u8(dc0, 0); - int i; - for (i = 0; i < 8; ++i) { - vst1_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc)); - } - } -} - -void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_8x8(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - dc_8x8(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)left; - dc_8x8(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - (void)left; - dc_8x8(dst, stride, NULL, NULL, 0, 0); -} - -//------------------------------------------------------------------------------ -// DC 16x16 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x16_t A = vld1q_u8(above); // top row - const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top - const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); - const uint16x4_t p2 = vpadd_u16(p1, p1); - const uint16x4_t p3 = vpadd_u16(p2, p2); - sum_top = vcombine_u16(p3, p3); - } - - if (do_left) { - const uint8x16_t L = vld1q_u8(left); // left row - const uint16x8_t p0 = vpaddlq_u8(L); // cascading summation of the left - const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); - const uint16x4_t p2 = vpadd_u16(p1, p1); - const uint16x4_t p3 = vpadd_u16(p2, p2); - sum_left = vcombine_u16(p3, p3); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 5); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 4); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 4); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x16_t dc = vdupq_lane_u8(dc0, 0); - int i; - for (i = 0; i < 16; ++i) { - vst1q_u8(dst + i * stride, dc); - } - } -} - -void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_16x16(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - dc_16x16(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)left; - dc_16x16(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - (void)left; - dc_16x16(dst, stride, NULL, NULL, 0, 0); -} - -//------------------------------------------------------------------------------ -// DC 32x32 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x16_t A0 = vld1q_u8(above); // top row - const uint8x16_t A1 = vld1q_u8(above + 16); - const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top - const uint16x8_t p1 = vpaddlq_u8(A1); - const uint16x8_t p2 = vaddq_u16(p0, p1); - const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); - const uint16x4_t p4 = vpadd_u16(p3, p3); - const uint16x4_t p5 = vpadd_u16(p4, p4); - sum_top = vcombine_u16(p5, p5); - } - - if (do_left) { - const uint8x16_t L0 = vld1q_u8(left); // left row - const uint8x16_t L1 = vld1q_u8(left + 16); - const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left - const uint16x8_t p1 = vpaddlq_u8(L1); - const uint16x8_t p2 = vaddq_u16(p0, p1); - const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); - const uint16x4_t p4 = vpadd_u16(p3, p3); - const uint16x4_t p5 = vpadd_u16(p4, p4); - sum_left = vcombine_u16(p5, p5); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 6); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 5); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 5); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x16_t dc = vdupq_lane_u8(dc0, 0); - int i; - for (i = 0; i < 32; ++i) { - vst1q_u8(dst + i * stride, dc); - vst1q_u8(dst + i * stride + 16, dc); - } - } -} - -void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_32x32(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - dc_32x32(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)left; - dc_32x32(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - (void)left; - dc_32x32(dst, stride, NULL, NULL, 0, 0); -} - -// ----------------------------------------------------------------------------- - -void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(above)); // top row - const uint64x1_t A1 = vshr_n_u64(A0, 8); - const uint64x1_t A2 = vshr_n_u64(A0, 16); - const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0); - const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1); - const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2); - const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00); - const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0); - const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); - const uint32x2_t r0 = vreinterpret_u32_u8(avg2); - const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); - const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); - const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); - (void)left; - vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0); - vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0); - vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0); - vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0); - dst[3 * stride + 3] = above[7]; -} - -void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 }; - static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 }; - const uint8x8_t sh_12345677 = vld1_u8(shuffle1); - const uint8x8_t sh_23456777 = vld1_u8(shuffle2); - const uint8x8_t A0 = vld1_u8(above); // top row - const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677); - const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777); - const uint8x8_t avg1 = vhadd_u8(A0, A2); - uint8x8_t row = vrhadd_u8(avg1, A1); - int i; - (void)left; - for (i = 0; i < 7; ++i) { - vst1_u8(dst + i * stride, row); - row = vtbl1_u8(row, sh_12345677); - } - vst1_u8(dst + i * stride, row); -} - -void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const uint8x16_t A0 = vld1q_u8(above); // top row - const uint8x16_t above_right = vld1q_dup_u8(above + 15); - const uint8x16_t A1 = vextq_u8(A0, above_right, 1); - const uint8x16_t A2 = vextq_u8(A0, above_right, 2); - const uint8x16_t avg1 = vhaddq_u8(A0, A2); - uint8x16_t row = vrhaddq_u8(avg1, A1); - int i; - (void)left; - for (i = 0; i < 15; ++i) { - vst1q_u8(dst + i * stride, row); - row = vextq_u8(row, above_right, 1); - } - vst1q_u8(dst + i * stride, row); -} - -// ----------------------------------------------------------------------------- - -void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const uint8x8_t XABCD_u8 = vld1_u8(above - 1); - const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8); - const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32); - const uint32x2_t zero = vdup_n_u32(0); - const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0); - const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL); - const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8)); - const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC); - const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8)); - const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16)); - const uint8_t D = vget_lane_u8(XABCD_u8, 4); - const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6); - const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC); - const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8); - const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_); - const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); - const uint32x2_t r3 = vreinterpret_u32_u8(avg2); - const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); - const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); - const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); - vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0); - vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0); - vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0); - vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0); -} - -#if !HAVE_NEON_ASM - -void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint32x2_t d0u32 = vdup_n_u32(0); - (void)left; - - d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0); - for (i = 0; i < 4; i++, dst += stride) - vst1_lane_u32((uint32_t *)dst, d0u32, 0); -} - -void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint8x8_t d0u8 = vdup_n_u8(0); - (void)left; - - d0u8 = vld1_u8(above); - for (i = 0; i < 8; i++, dst += stride) - vst1_u8(dst, d0u8); -} - -void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint8x16_t q0u8 = vdupq_n_u8(0); - (void)left; - - q0u8 = vld1q_u8(above); - for (i = 0; i < 16; i++, dst += stride) - vst1q_u8(dst, q0u8); -} - -void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)left; - - q0u8 = vld1q_u8(above); - q1u8 = vld1q_u8(above + 16); - for (i = 0; i < 32; i++, dst += stride) { - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - } -} - -void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - uint8x8_t d0u8 = vdup_n_u8(0); - uint32x2_t d1u32 = vdup_n_u32(0); - (void)above; - - d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0); - - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); -} - -void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - uint8x8_t d0u8 = vdup_n_u8(0); - uint64x1_t d1u64 = vdup_n_u64(0); - (void)above; - - d1u64 = vld1_u64((const uint64_t *)left); - - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7); - vst1_u8(dst, d0u8); -} - -void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j; - uint8x8_t d2u8 = vdup_n_u8(0); - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)above; - - q1u8 = vld1q_u8(left); - d2u8 = vget_low_u8(q1u8); - for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { - q0u8 = vdupq_lane_u8(d2u8, 0); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 1); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 2); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 3); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 4); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 5); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 6); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 7); - vst1q_u8(dst, q0u8); - dst += stride; - } -} - -void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j, k; - uint8x8_t d2u8 = vdup_n_u8(0); - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)above; - - for (k = 0; k < 2; k++, left += 16) { - q1u8 = vld1q_u8(left); - d2u8 = vget_low_u8(q1u8); - for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { - q0u8 = vdupq_lane_u8(d2u8, 0); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 1); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 2); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 3); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 4); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 5); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 6); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 7); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - } - } -} - -void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint16x8_t q1u16, q3u16; - int16x8_t q1s16; - uint8x8_t d0u8 = vdup_n_u8(0); - uint32x2_t d2u32 = vdup_n_u32(0); - - d0u8 = vld1_dup_u8(above - 1); - d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0); - q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8); - for (i = 0; i < 4; i++, dst += stride) { - q1u16 = vdupq_n_u16((uint16_t)left[i]); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16), - vreinterpretq_s16_u16(q3u16)); - d0u8 = vqmovun_s16(q1s16); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - } -} - -void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j; - uint16x8_t q0u16, q3u16, q10u16; - int16x8_t q0s16; - uint16x4_t d20u16; - uint8x8_t d0u8, d2u8, d30u8; - - d0u8 = vld1_dup_u8(above - 1); - d30u8 = vld1_u8(left); - d2u8 = vld1_u8(above); - q10u16 = vmovl_u8(d30u8); - q3u16 = vsubl_u8(d2u8, d0u8); - d20u16 = vget_low_u16(q10u16); - for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { - q0u16 = vdupq_lane_u16(d20u16, 0); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - q0u16 = vdupq_lane_u16(d20u16, 1); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - q0u16 = vdupq_lane_u16(d20u16, 2); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - q0u16 = vdupq_lane_u16(d20u16, 3); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - } -} - -void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j, k; - uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16; - uint8x16_t q0u8, q1u8; - int16x8_t q0s16, q1s16, q8s16, q11s16; - uint16x4_t d20u16; - uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8; - - q0u8 = vld1q_dup_u8(above - 1); - q1u8 = vld1q_u8(above); - q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); - q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); - for (k = 0; k < 2; k++, left += 8) { - d18u8 = vld1_u8(left); - q10u16 = vmovl_u8(d18u8); - d20u16 = vget_low_u16(q10u16); - for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { - q0u16 = vdupq_lane_u16(d20u16, 0); - q8u16 = vdupq_lane_u16(d20u16, 1); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q2u16)); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q3u16)); - q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q2u16)); - q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q3u16)); - d2u8 = vqmovun_s16(q1s16); - d3u8 = vqmovun_s16(q0s16); - d22u8 = vqmovun_s16(q11s16); - d23u8 = vqmovun_s16(q8s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); - dst += stride; - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d20u16, 2); - q8u16 = vdupq_lane_u16(d20u16, 3); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q2u16)); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q3u16)); - q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q2u16)); - q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q3u16)); - d2u8 = vqmovun_s16(q1s16); - d3u8 = vqmovun_s16(q0s16); - d22u8 = vqmovun_s16(q11s16); - d23u8 = vqmovun_s16(q8s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); - dst += stride; - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); - dst += stride; - } - } -} - -void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j, k; - uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16; - uint8x16_t q0u8, q1u8, q2u8; - int16x8_t q12s16, q13s16, q14s16, q15s16; - uint16x4_t d6u16; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8; - - q0u8 = vld1q_dup_u8(above - 1); - q1u8 = vld1q_u8(above); - q2u8 = vld1q_u8(above + 16); - q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); - q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); - q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8)); - q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8)); - for (k = 0; k < 4; k++, left += 8) { - d26u8 = vld1_u8(left); - q3u16 = vmovl_u8(d26u8); - d6u16 = vget_low_u16(q3u16); - for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) { - q0u16 = vdupq_lane_u16(d6u16, 0); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d6u16, 1); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d6u16, 2); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d6u16, 3); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - } - } -} -#endif // !HAVE_NEON_ASM diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c deleted file mode 100644 index d24e6adc8a..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_dsp_rtcd.h" -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -static INLINE void loop_filter_neon_16( - uint8x16_t qblimit, // blimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p3 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r) { // q1 - uint8x16_t q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q2s16, q11s16; - uint16x8_t q4u16; - int8x16_t q0s8, q1s8, q2s8, q11s8, q12s8, q13s8; - int8x8_t d2s8, d3s8; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q3 = vabdq_u8(q9, q8); - q4 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q3 = vmaxq_u8(q3, q4); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q9 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q3); - - q2u8 = vabdq_u8(q5, q8); - q9 = vqaddq_u8(q9, q9); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - // vp8_filter() function - // convert to signed - q10 = vdupq_n_u8(0x80); - q8 = veorq_u8(q8, q10); - q7 = veorq_u8(q7, q10); - q6 = veorq_u8(q6, q10); - q5 = veorq_u8(q5, q10); - - q2u8 = vshrq_n_u8(q2u8, 1); - q9 = vqaddq_u8(q9, q2u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q9 = vcgeq_u8(qblimit, q9); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q14u8 = vorrq_u8(q13u8, q14u8); - - q4u16 = vdupq_n_u16(3); - q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); - q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); - - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); - q15u8 = vandq_u8(q15u8, q9); - - q1s8 = vreinterpretq_s8_u8(q1u8); - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); - - q4 = vdupq_n_u8(3); - q9 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2s8 = vqmovn_s16(q2s16); - d3s8 = vqmovn_s16(q11s16); - q1s8 = vcombine_s8(d2s8, d3s8); - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); - q1s8 = vreinterpretq_s8_u8(q1u8); - - q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q4)); - q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); - q2s8 = vshrq_n_s8(q2s8, 3); - q1s8 = vshrq_n_s8(q1s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); - q0s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); - - q1s8 = vrshrq_n_s8(q1s8, 1); - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); - q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); - - *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q10); - *q7r = veorq_u8(vreinterpretq_u8_s8(q0s8), q10); - *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q10); - *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q10); - return; -} - -void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - uint8x8_t dblimit0, dlimit0, dthresh0, dblimit1, dlimit1, dthresh1; - uint8x16_t qblimit, qlimit, qthresh; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; - - dblimit0 = vld1_u8(blimit0); - dlimit0 = vld1_u8(limit0); - dthresh0 = vld1_u8(thresh0); - dblimit1 = vld1_u8(blimit1); - dlimit1 = vld1_u8(limit1); - dthresh1 = vld1_u8(thresh1); - qblimit = vcombine_u8(dblimit0, dblimit1); - qlimit = vcombine_u8(dlimit0, dlimit1); - qthresh = vcombine_u8(dthresh0, dthresh1); - - s -= (p << 2); - - q3u8 = vld1q_u8(s); - s += p; - q4u8 = vld1q_u8(s); - s += p; - q5u8 = vld1q_u8(s); - s += p; - q6u8 = vld1q_u8(s); - s += p; - q7u8 = vld1q_u8(s); - s += p; - q8u8 = vld1q_u8(s); - s += p; - q9u8 = vld1q_u8(s); - s += p; - q10u8 = vld1q_u8(s); - - loop_filter_neon_16(qblimit, qlimit, qthresh, - q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, - &q5u8, &q6u8, &q7u8, &q8u8); - - s -= (p * 5); - vst1q_u8(s, q5u8); - s += p; - vst1q_u8(s, q6u8); - s += p; - vst1q_u8(s, q7u8); - s += p; - vst1q_u8(s, q8u8); - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c deleted file mode 100644 index 7f3ee70b94..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_dsp_rtcd.h" - -static INLINE void loop_filter_neon( - uint8x8_t dblimit, // flimit - uint8x8_t dlimit, // limit - uint8x8_t dthresh, // thresh - uint8x8_t d3u8, // p3 - uint8x8_t d4u8, // p2 - uint8x8_t d5u8, // p1 - uint8x8_t d6u8, // p0 - uint8x8_t d7u8, // q0 - uint8x8_t d16u8, // q1 - uint8x8_t d17u8, // q2 - uint8x8_t d18u8, // q3 - uint8x8_t *d4ru8, // p1 - uint8x8_t *d5ru8, // p0 - uint8x8_t *d6ru8, // q0 - uint8x8_t *d7ru8) { // q1 - uint8x8_t d19u8, d20u8, d21u8, d22u8, d23u8, d27u8, d28u8; - int16x8_t q12s16; - int8x8_t d19s8, d20s8, d21s8, d26s8, d27s8, d28s8; - - d19u8 = vabd_u8(d3u8, d4u8); - d20u8 = vabd_u8(d4u8, d5u8); - d21u8 = vabd_u8(d5u8, d6u8); - d22u8 = vabd_u8(d16u8, d7u8); - d3u8 = vabd_u8(d17u8, d16u8); - d4u8 = vabd_u8(d18u8, d17u8); - - d19u8 = vmax_u8(d19u8, d20u8); - d20u8 = vmax_u8(d21u8, d22u8); - d3u8 = vmax_u8(d3u8, d4u8); - d23u8 = vmax_u8(d19u8, d20u8); - - d17u8 = vabd_u8(d6u8, d7u8); - - d21u8 = vcgt_u8(d21u8, dthresh); - d22u8 = vcgt_u8(d22u8, dthresh); - d23u8 = vmax_u8(d23u8, d3u8); - - d28u8 = vabd_u8(d5u8, d16u8); - d17u8 = vqadd_u8(d17u8, d17u8); - - d23u8 = vcge_u8(dlimit, d23u8); - - d18u8 = vdup_n_u8(0x80); - d5u8 = veor_u8(d5u8, d18u8); - d6u8 = veor_u8(d6u8, d18u8); - d7u8 = veor_u8(d7u8, d18u8); - d16u8 = veor_u8(d16u8, d18u8); - - d28u8 = vshr_n_u8(d28u8, 1); - d17u8 = vqadd_u8(d17u8, d28u8); - - d19u8 = vdup_n_u8(3); - - d28s8 = vsub_s8(vreinterpret_s8_u8(d7u8), - vreinterpret_s8_u8(d6u8)); - - d17u8 = vcge_u8(dblimit, d17u8); - - d27s8 = vqsub_s8(vreinterpret_s8_u8(d5u8), - vreinterpret_s8_u8(d16u8)); - - d22u8 = vorr_u8(d21u8, d22u8); - - q12s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d19u8)); - - d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d22u8); - d23u8 = vand_u8(d23u8, d17u8); - - q12s16 = vaddw_s8(q12s16, vreinterpret_s8_u8(d27u8)); - - d17u8 = vdup_n_u8(4); - - d27s8 = vqmovn_s16(q12s16); - d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d23u8); - d27s8 = vreinterpret_s8_u8(d27u8); - - d28s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d19u8)); - d27s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d17u8)); - d28s8 = vshr_n_s8(d28s8, 3); - d27s8 = vshr_n_s8(d27s8, 3); - - d19s8 = vqadd_s8(vreinterpret_s8_u8(d6u8), d28s8); - d26s8 = vqsub_s8(vreinterpret_s8_u8(d7u8), d27s8); - - d27s8 = vrshr_n_s8(d27s8, 1); - d27s8 = vbic_s8(d27s8, vreinterpret_s8_u8(d22u8)); - - d21s8 = vqadd_s8(vreinterpret_s8_u8(d5u8), d27s8); - d20s8 = vqsub_s8(vreinterpret_s8_u8(d16u8), d27s8); - - *d4ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d18u8); - *d5ru8 = veor_u8(vreinterpret_u8_s8(d19s8), d18u8); - *d6ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d18u8); - *d7ru8 = veor_u8(vreinterpret_u8_s8(d20s8), d18u8); - return; -} - -void vpx_lpf_horizontal_4_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i; - uint8_t *s, *psrc; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - psrc = src - (pitch << 2); - for (i = 0; i < 1; i++) { - s = psrc + i * 8; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - loop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d4u8, &d5u8, &d6u8, &d7u8); - - s -= (pitch * 5); - vst1_u8(s, d4u8); - s += pitch; - vst1_u8(s, d5u8); - s += pitch; - vst1_u8(s, d6u8); - s += pitch; - vst1_u8(s, d7u8); - } - return; -} - -void vpx_lpf_vertical_4_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i, pitch8; - uint8_t *s; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; - uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; - uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; - uint8x8x4_t d4Result; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - pitch8 = pitch * 8; - for (i = 0; i < 1; i++, src += pitch8) { - s = src - (i + 1) * 4; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), - vreinterpret_u32_u8(d7u8)); - d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), - vreinterpret_u32_u8(d16u8)); - d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), - vreinterpret_u32_u8(d17u8)); - d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), - vreinterpret_u32_u8(d18u8)); - - d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), - vreinterpret_u16_u32(d2tmp2.val[0])); - d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), - vreinterpret_u16_u32(d2tmp3.val[0])); - d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), - vreinterpret_u16_u32(d2tmp2.val[1])); - d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), - vreinterpret_u16_u32(d2tmp3.val[1])); - - d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), - vreinterpret_u8_u16(d2tmp5.val[0])); - d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), - vreinterpret_u8_u16(d2tmp5.val[1])); - d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), - vreinterpret_u8_u16(d2tmp7.val[0])); - d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), - vreinterpret_u8_u16(d2tmp7.val[1])); - - d3u8 = d2tmp8.val[0]; - d4u8 = d2tmp8.val[1]; - d5u8 = d2tmp9.val[0]; - d6u8 = d2tmp9.val[1]; - d7u8 = d2tmp10.val[0]; - d16u8 = d2tmp10.val[1]; - d17u8 = d2tmp11.val[0]; - d18u8 = d2tmp11.val[1]; - - loop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d4u8, &d5u8, &d6u8, &d7u8); - - d4Result.val[0] = d4u8; - d4Result.val[1] = d5u8; - d4Result.val[2] = d6u8; - d4Result.val[3] = d7u8; - - src -= 2; - vst4_lane_u8(src, d4Result, 0); - src += pitch; - vst4_lane_u8(src, d4Result, 1); - src += pitch; - vst4_lane_u8(src, d4Result, 2); - src += pitch; - vst4_lane_u8(src, d4Result, 3); - src += pitch; - vst4_lane_u8(src, d4Result, 4); - src += pitch; - vst4_lane_u8(src, d4Result, 5); - src += pitch; - vst4_lane_u8(src, d4Result, 6); - src += pitch; - vst4_lane_u8(src, d4Result, 7); - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c deleted file mode 100644 index ec3757380d..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_dsp_rtcd.h" - -static INLINE void mbloop_filter_neon( - uint8x8_t dblimit, // mblimit - uint8x8_t dlimit, // limit - uint8x8_t dthresh, // thresh - uint8x8_t d3u8, // p2 - uint8x8_t d4u8, // p2 - uint8x8_t d5u8, // p1 - uint8x8_t d6u8, // p0 - uint8x8_t d7u8, // q0 - uint8x8_t d16u8, // q1 - uint8x8_t d17u8, // q2 - uint8x8_t d18u8, // q3 - uint8x8_t *d0ru8, // p1 - uint8x8_t *d1ru8, // p1 - uint8x8_t *d2ru8, // p0 - uint8x8_t *d3ru8, // q0 - uint8x8_t *d4ru8, // q1 - uint8x8_t *d5ru8) { // q1 - uint32_t flat; - uint8x8_t d0u8, d1u8, d2u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8; - uint8x8_t d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; - int16x8_t q15s16; - uint16x8_t q10u16, q14u16; - int8x8_t d21s8, d24s8, d25s8, d26s8, d28s8, d29s8, d30s8; - - d19u8 = vabd_u8(d3u8, d4u8); - d20u8 = vabd_u8(d4u8, d5u8); - d21u8 = vabd_u8(d5u8, d6u8); - d22u8 = vabd_u8(d16u8, d7u8); - d23u8 = vabd_u8(d17u8, d16u8); - d24u8 = vabd_u8(d18u8, d17u8); - - d19u8 = vmax_u8(d19u8, d20u8); - d20u8 = vmax_u8(d21u8, d22u8); - - d25u8 = vabd_u8(d6u8, d4u8); - - d23u8 = vmax_u8(d23u8, d24u8); - - d26u8 = vabd_u8(d7u8, d17u8); - - d19u8 = vmax_u8(d19u8, d20u8); - - d24u8 = vabd_u8(d6u8, d7u8); - d27u8 = vabd_u8(d3u8, d6u8); - d28u8 = vabd_u8(d18u8, d7u8); - - d19u8 = vmax_u8(d19u8, d23u8); - - d23u8 = vabd_u8(d5u8, d16u8); - d24u8 = vqadd_u8(d24u8, d24u8); - - - d19u8 = vcge_u8(dlimit, d19u8); - - - d25u8 = vmax_u8(d25u8, d26u8); - d26u8 = vmax_u8(d27u8, d28u8); - - d23u8 = vshr_n_u8(d23u8, 1); - - d25u8 = vmax_u8(d25u8, d26u8); - - d24u8 = vqadd_u8(d24u8, d23u8); - - d20u8 = vmax_u8(d20u8, d25u8); - - d23u8 = vdup_n_u8(1); - d24u8 = vcge_u8(dblimit, d24u8); - - d21u8 = vcgt_u8(d21u8, dthresh); - - d20u8 = vcge_u8(d23u8, d20u8); - - d19u8 = vand_u8(d19u8, d24u8); - - d23u8 = vcgt_u8(d22u8, dthresh); - - d20u8 = vand_u8(d20u8, d19u8); - - d22u8 = vdup_n_u8(0x80); - - d23u8 = vorr_u8(d21u8, d23u8); - - q10u16 = vcombine_u16(vreinterpret_u16_u8(d20u8), - vreinterpret_u16_u8(d21u8)); - - d30u8 = vshrn_n_u16(q10u16, 4); - flat = vget_lane_u32(vreinterpret_u32_u8(d30u8), 0); - - if (flat == 0xffffffff) { // Check for all 1's, power_branch_only - d27u8 = vdup_n_u8(3); - d21u8 = vdup_n_u8(2); - q14u16 = vaddl_u8(d6u8, d7u8); - q14u16 = vmlal_u8(q14u16, d3u8, d27u8); - q14u16 = vmlal_u8(q14u16, d4u8, d21u8); - q14u16 = vaddw_u8(q14u16, d5u8); - *d0ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vaddw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d16u8); - *d1ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d17u8); - *d2ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d7u8); - q14u16 = vaddw_u8(q14u16, d18u8); - *d3ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vsubw_u8(q14u16, d7u8); - q14u16 = vaddw_u8(q14u16, d16u8); - q14u16 = vaddw_u8(q14u16, d18u8); - *d4ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vsubw_u8(q14u16, d16u8); - q14u16 = vaddw_u8(q14u16, d17u8); - q14u16 = vaddw_u8(q14u16, d18u8); - *d5ru8 = vqrshrn_n_u16(q14u16, 3); - } else { - d21u8 = veor_u8(d7u8, d22u8); - d24u8 = veor_u8(d6u8, d22u8); - d25u8 = veor_u8(d5u8, d22u8); - d26u8 = veor_u8(d16u8, d22u8); - - d27u8 = vdup_n_u8(3); - - d28s8 = vsub_s8(vreinterpret_s8_u8(d21u8), vreinterpret_s8_u8(d24u8)); - d29s8 = vqsub_s8(vreinterpret_s8_u8(d25u8), vreinterpret_s8_u8(d26u8)); - - q15s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d27u8)); - - d29s8 = vand_s8(d29s8, vreinterpret_s8_u8(d23u8)); - - q15s16 = vaddw_s8(q15s16, d29s8); - - d29u8 = vdup_n_u8(4); - - d28s8 = vqmovn_s16(q15s16); - - d28s8 = vand_s8(d28s8, vreinterpret_s8_u8(d19u8)); - - d30s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d27u8)); - d29s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d29u8)); - d30s8 = vshr_n_s8(d30s8, 3); - d29s8 = vshr_n_s8(d29s8, 3); - - d24s8 = vqadd_s8(vreinterpret_s8_u8(d24u8), d30s8); - d21s8 = vqsub_s8(vreinterpret_s8_u8(d21u8), d29s8); - - d29s8 = vrshr_n_s8(d29s8, 1); - d29s8 = vbic_s8(d29s8, vreinterpret_s8_u8(d23u8)); - - d25s8 = vqadd_s8(vreinterpret_s8_u8(d25u8), d29s8); - d26s8 = vqsub_s8(vreinterpret_s8_u8(d26u8), d29s8); - - if (flat == 0) { // filter_branch_only - *d0ru8 = d4u8; - *d1ru8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8); - *d2ru8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8); - *d3ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8); - *d4ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8); - *d5ru8 = d17u8; - return; - } - - d21u8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8); - d24u8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8); - d25u8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8); - d26u8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8); - - d23u8 = vdup_n_u8(2); - q14u16 = vaddl_u8(d6u8, d7u8); - q14u16 = vmlal_u8(q14u16, d3u8, d27u8); - q14u16 = vmlal_u8(q14u16, d4u8, d23u8); - - d0u8 = vbsl_u8(d20u8, dblimit, d4u8); - - q14u16 = vaddw_u8(q14u16, d5u8); - - d1u8 = vbsl_u8(d20u8, dlimit, d25u8); - - d30u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vaddw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d16u8); - - d2u8 = vbsl_u8(d20u8, dthresh, d24u8); - - d31u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d17u8); - - *d0ru8 = vbsl_u8(d20u8, d30u8, d0u8); - - d23u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d7u8); - - *d1ru8 = vbsl_u8(d20u8, d31u8, d1u8); - - q14u16 = vaddw_u8(q14u16, d18u8); - - *d2ru8 = vbsl_u8(d20u8, d23u8, d2u8); - - d22u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vsubw_u8(q14u16, d7u8); - q14u16 = vaddw_u8(q14u16, d16u8); - - d3u8 = vbsl_u8(d20u8, d3u8, d21u8); - - q14u16 = vaddw_u8(q14u16, d18u8); - - d4u8 = vbsl_u8(d20u8, d4u8, d26u8); - - d6u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vsubw_u8(q14u16, d16u8); - q14u16 = vaddw_u8(q14u16, d17u8); - q14u16 = vaddw_u8(q14u16, d18u8); - - d5u8 = vbsl_u8(d20u8, d5u8, d17u8); - - d7u8 = vqrshrn_n_u16(q14u16, 3); - - *d3ru8 = vbsl_u8(d20u8, d22u8, d3u8); - *d4ru8 = vbsl_u8(d20u8, d6u8, d4u8); - *d5ru8 = vbsl_u8(d20u8, d7u8, d5u8); - } - return; -} - -void vpx_lpf_horizontal_8_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i; - uint8_t *s, *psrc; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - uint8x8_t d16u8, d17u8, d18u8; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - psrc = src - (pitch << 2); - for (i = 0; i < 1; i++) { - s = psrc + i * 8; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - mbloop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); - - s -= (pitch * 6); - vst1_u8(s, d0u8); - s += pitch; - vst1_u8(s, d1u8); - s += pitch; - vst1_u8(s, d2u8); - s += pitch; - vst1_u8(s, d3u8); - s += pitch; - vst1_u8(s, d4u8); - s += pitch; - vst1_u8(s, d5u8); - } - return; -} - -void vpx_lpf_vertical_8_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i; - uint8_t *s; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - uint8x8_t d16u8, d17u8, d18u8; - uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; - uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; - uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; - uint8x8x4_t d4Result; - uint8x8x2_t d2Result; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - for (i = 0; i < 1; i++) { - s = src + (i * (pitch << 3)) - 4; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), - vreinterpret_u32_u8(d7u8)); - d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), - vreinterpret_u32_u8(d16u8)); - d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), - vreinterpret_u32_u8(d17u8)); - d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), - vreinterpret_u32_u8(d18u8)); - - d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), - vreinterpret_u16_u32(d2tmp2.val[0])); - d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), - vreinterpret_u16_u32(d2tmp3.val[0])); - d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), - vreinterpret_u16_u32(d2tmp2.val[1])); - d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), - vreinterpret_u16_u32(d2tmp3.val[1])); - - d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), - vreinterpret_u8_u16(d2tmp5.val[0])); - d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), - vreinterpret_u8_u16(d2tmp5.val[1])); - d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), - vreinterpret_u8_u16(d2tmp7.val[0])); - d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), - vreinterpret_u8_u16(d2tmp7.val[1])); - - d3u8 = d2tmp8.val[0]; - d4u8 = d2tmp8.val[1]; - d5u8 = d2tmp9.val[0]; - d6u8 = d2tmp9.val[1]; - d7u8 = d2tmp10.val[0]; - d16u8 = d2tmp10.val[1]; - d17u8 = d2tmp11.val[0]; - d18u8 = d2tmp11.val[1]; - - mbloop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); - - d4Result.val[0] = d0u8; - d4Result.val[1] = d1u8; - d4Result.val[2] = d2u8; - d4Result.val[3] = d3u8; - - d2Result.val[0] = d4u8; - d2Result.val[1] = d5u8; - - s = src - 3; - vst4_lane_u8(s, d4Result, 0); - s += pitch; - vst4_lane_u8(s, d4Result, 1); - s += pitch; - vst4_lane_u8(s, d4Result, 2); - s += pitch; - vst4_lane_u8(s, d4Result, 3); - s += pitch; - vst4_lane_u8(s, d4Result, 4); - s += pitch; - vst4_lane_u8(s, d4Result, 5); - s += pitch; - vst4_lane_u8(s, d4Result, 6); - s += pitch; - vst4_lane_u8(s, d4Result, 7); - - s = src + 1; - vst2_lane_u8(s, d2Result, 0); - s += pitch; - vst2_lane_u8(s, d2Result, 1); - s += pitch; - vst2_lane_u8(s, d2Result, 2); - s += pitch; - vst2_lane_u8(s, d2Result, 3); - s += pitch; - vst2_lane_u8(s, d2Result, 4); - s += pitch; - vst2_lane_u8(s, d2Result, 5); - s += pitch; - vst2_lane_u8(s, d2Result, 6); - s += pitch; - vst2_lane_u8(s, d2Result, 7); - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c deleted file mode 100644 index aa31f29358..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_dsp_rtcd.h" -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0); - vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1); -} - -#if HAVE_NEON_ASM -void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0); - vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh); - vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh); -} -#endif // HAVE_NEON_ASM diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c deleted file mode 100644 index 8632250138..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -static INLINE int32x4_t MULTIPLY_BY_Q0( - int16x4_t dsrc0, - int16x4_t dsrc1, - int16x4_t dsrc2, - int16x4_t dsrc3, - int16x4_t dsrc4, - int16x4_t dsrc5, - int16x4_t dsrc6, - int16x4_t dsrc7, - int16x8_t q0s16) { - int32x4_t qdst; - int16x4_t d0s16, d1s16; - - d0s16 = vget_low_s16(q0s16); - d1s16 = vget_high_s16(q0s16); - - qdst = vmull_lane_s16(dsrc0, d0s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3); - qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3); - return qdst; -} - -void vpx_convolve8_avg_horiz_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, - int x_step_q4, - const int16_t *filter_y, // unused - int y_step_q4, // unused - int w, - int h) { - int width; - const uint8_t *s; - uint8_t *d; - uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8; - uint32x2_t d2u32, d3u32, d6u32, d7u32, d28u32, d29u32, d30u32, d31u32; - uint8x16_t q1u8, q3u8, q12u8, q13u8, q14u8, q15u8; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - uint16x8x2_t q0x2u16; - uint8x8x2_t d0x2u8, d1x2u8; - uint32x2x2_t d0x2u32; - uint16x4x2_t d0x2u16, d1x2u16; - uint32x4x2_t q0x2u32; - - assert(x_step_q4 == 16); - - q0s16 = vld1q_s16(filter_x); - - src -= 3; // adjust for taps - for (; h > 0; h -= 4) { // loop_horiz_v - s = src; - d24u8 = vld1_u8(s); - s += src_stride; - d25u8 = vld1_u8(s); - s += src_stride; - d26u8 = vld1_u8(s); - s += src_stride; - d27u8 = vld1_u8(s); - - q12u8 = vcombine_u8(d24u8, d25u8); - q13u8 = vcombine_u8(d26u8, d27u8); - - q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8), - vreinterpretq_u16_u8(q13u8)); - d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0])); - d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0])); - d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1])); - d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1])); - d0x2u8 = vtrn_u8(d24u8, d25u8); - d1x2u8 = vtrn_u8(d26u8, d27u8); - - __builtin_prefetch(src + src_stride * 4); - __builtin_prefetch(src + src_stride * 5); - - q8u16 = vmovl_u8(d0x2u8.val[0]); - q9u16 = vmovl_u8(d0x2u8.val[1]); - q10u16 = vmovl_u8(d1x2u8.val[0]); - q11u16 = vmovl_u8(d1x2u8.val[1]); - - src += 7; - d16u16 = vget_low_u16(q8u16); - d17u16 = vget_high_u16(q8u16); - d18u16 = vget_low_u16(q9u16); - d19u16 = vget_high_u16(q9u16); - q8u16 = vcombine_u16(d16u16, d18u16); // vswp 17 18 - q9u16 = vcombine_u16(d17u16, d19u16); - - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); // vmov 23 21 - for (width = w; - width > 0; - width -= 4, src += 4, dst += 4) { // loop_horiz - s = src; - d28u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d29u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d31u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d30u32 = vld1_dup_u32((const uint32_t *)s); - - __builtin_prefetch(src + 64); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32), - vreinterpret_u16_u32(d31u32)); - d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32), - vreinterpret_u16_u32(d30u32)); - d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]), // d28 - vreinterpret_u8_u16(d1x2u16.val[0])); // d29 - d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]), // d31 - vreinterpret_u8_u16(d1x2u16.val[1])); // d30 - - __builtin_prefetch(src + 64 + src_stride); - - q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); - q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]); - q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8), - vreinterpretq_u32_u8(q15u8)); - - d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0])); - d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0])); - q12u16 = vmovl_u8(d28u8); - q13u16 = vmovl_u8(d29u8); - - __builtin_prefetch(src + 64 + src_stride * 2); - - d = dst; - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0); - d += dst_stride; - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1); - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16, - d18s16, d19s16, d23s16, d24s16, q0s16); - q2s32 = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16, - d19s16, d23s16, d24s16, d26s16, q0s16); - q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16, - d23s16, d24s16, d26s16, d27s16, q0s16); - q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - __builtin_prefetch(src + 64 + src_stride * 3); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u8 = vqmovn_u16(q1u16); - d3u8 = vqmovn_u16(q2u16); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8), - vreinterpret_u16_u8(d3u8)); - d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]), - vreinterpret_u32_u16(d0x2u16.val[1])); - d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]), - vreinterpret_u8_u32(d0x2u32.val[1])); - - q1u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); - q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32)); - - q1u8 = vrhaddq_u8(q1u8, q3u8); - - d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8)); - d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8)); - - d = dst; - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - - q8u16 = q9u16; - d20s16 = d23s16; - q11u16 = q12u16; - q9u16 = q13u16; - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - } - src += src_stride * 4 - w - 7; - dst += dst_stride * 4 - w; - } - return; -} - -void vpx_convolve8_avg_vert_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, // unused - int x_step_q4, // unused - const int16_t *filter_y, - int y_step_q4, - int w, - int h) { - int height; - const uint8_t *s; - uint8_t *d; - uint8x8_t d2u8, d3u8; - uint32x2_t d2u32, d3u32, d6u32, d7u32; - uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; - uint8x16_t q1u8, q3u8; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - - assert(y_step_q4 == 16); - - src -= src_stride * 3; - q0s16 = vld1q_s16(filter_y); - for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h - s = src; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); - s += src_stride; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1); - s += src_stride; - d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0); - s += src_stride; - - q8u16 = vmovl_u8(vreinterpret_u8_u32(d16u32)); - q9u16 = vmovl_u8(vreinterpret_u8_u32(d18u32)); - q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32)); - q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32)); - - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d = dst; - for (height = h; height > 0; height -= 4) { // loop_vert - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1); - s += src_stride; - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1); - s += src_stride; - - q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32)); - q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32)); - - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0); - d += dst_stride; - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1); - d -= dst_stride * 3; - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - __builtin_prefetch(s); - __builtin_prefetch(s + src_stride); - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16, - d20s16, d21s16, d22s16, d24s16, q0s16); - __builtin_prefetch(s + src_stride * 2); - __builtin_prefetch(s + src_stride * 3); - q2s32 = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16, - d21s16, d22s16, d24s16, d26s16, q0s16); - __builtin_prefetch(d); - __builtin_prefetch(d + dst_stride); - q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16, - d22s16, d24s16, d26s16, d27s16, q0s16); - __builtin_prefetch(d + dst_stride * 2); - __builtin_prefetch(d + dst_stride * 3); - q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u8 = vqmovn_u16(q1u16); - d3u8 = vqmovn_u16(q2u16); - - q1u8 = vcombine_u8(d2u8, d3u8); - q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32)); - - q1u8 = vrhaddq_u8(q1u8, q3u8); - - d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8)); - d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8)); - - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - d += dst_stride; - - q8u16 = q10u16; - d18s16 = d22s16; - d19s16 = d24s16; - q10u16 = q13u16; - d22s16 = d25s16; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c deleted file mode 100644 index 9bd715e2c6..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -static INLINE int32x4_t MULTIPLY_BY_Q0( - int16x4_t dsrc0, - int16x4_t dsrc1, - int16x4_t dsrc2, - int16x4_t dsrc3, - int16x4_t dsrc4, - int16x4_t dsrc5, - int16x4_t dsrc6, - int16x4_t dsrc7, - int16x8_t q0s16) { - int32x4_t qdst; - int16x4_t d0s16, d1s16; - - d0s16 = vget_low_s16(q0s16); - d1s16 = vget_high_s16(q0s16); - - qdst = vmull_lane_s16(dsrc0, d0s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3); - qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3); - return qdst; -} - -void vpx_convolve8_horiz_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, - int x_step_q4, - const int16_t *filter_y, // unused - int y_step_q4, // unused - int w, - int h) { - int width; - const uint8_t *s, *psrc; - uint8_t *d, *pdst; - uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8; - uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32; - uint8x16_t q12u8, q13u8, q14u8, q15u8; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - uint16x8x2_t q0x2u16; - uint8x8x2_t d0x2u8, d1x2u8; - uint32x2x2_t d0x2u32; - uint16x4x2_t d0x2u16, d1x2u16; - uint32x4x2_t q0x2u32; - - assert(x_step_q4 == 16); - - q0s16 = vld1q_s16(filter_x); - - src -= 3; // adjust for taps - for (; h > 0; h -= 4, - src += src_stride * 4, - dst += dst_stride * 4) { // loop_horiz_v - s = src; - d24u8 = vld1_u8(s); - s += src_stride; - d25u8 = vld1_u8(s); - s += src_stride; - d26u8 = vld1_u8(s); - s += src_stride; - d27u8 = vld1_u8(s); - - q12u8 = vcombine_u8(d24u8, d25u8); - q13u8 = vcombine_u8(d26u8, d27u8); - - q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8), - vreinterpretq_u16_u8(q13u8)); - d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0])); - d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0])); - d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1])); - d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1])); - d0x2u8 = vtrn_u8(d24u8, d25u8); - d1x2u8 = vtrn_u8(d26u8, d27u8); - - __builtin_prefetch(src + src_stride * 4); - __builtin_prefetch(src + src_stride * 5); - __builtin_prefetch(src + src_stride * 6); - - q8u16 = vmovl_u8(d0x2u8.val[0]); - q9u16 = vmovl_u8(d0x2u8.val[1]); - q10u16 = vmovl_u8(d1x2u8.val[0]); - q11u16 = vmovl_u8(d1x2u8.val[1]); - - d16u16 = vget_low_u16(q8u16); - d17u16 = vget_high_u16(q8u16); - d18u16 = vget_low_u16(q9u16); - d19u16 = vget_high_u16(q9u16); - q8u16 = vcombine_u16(d16u16, d18u16); // vswp 17 18 - q9u16 = vcombine_u16(d17u16, d19u16); - - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); // vmov 23 21 - for (width = w, psrc = src + 7, pdst = dst; - width > 0; - width -= 4, psrc += 4, pdst += 4) { // loop_horiz - s = psrc; - d28u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d29u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d31u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d30u32 = vld1_dup_u32((const uint32_t *)s); - - __builtin_prefetch(psrc + 64); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32), - vreinterpret_u16_u32(d31u32)); - d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32), - vreinterpret_u16_u32(d30u32)); - d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]), // d28 - vreinterpret_u8_u16(d1x2u16.val[0])); // d29 - d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]), // d31 - vreinterpret_u8_u16(d1x2u16.val[1])); // d30 - - __builtin_prefetch(psrc + 64 + src_stride); - - q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); - q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]); - q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8), - vreinterpretq_u32_u8(q15u8)); - - d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0])); - d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0])); - q12u16 = vmovl_u8(d28u8); - q13u16 = vmovl_u8(d29u8); - - __builtin_prefetch(psrc + 64 + src_stride * 2); - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16, - d18s16, d19s16, d23s16, d24s16, q0s16); - q2s32 = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16, - d19s16, d23s16, d24s16, d26s16, q0s16); - q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16, - d23s16, d24s16, d26s16, d27s16, q0s16); - q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - __builtin_prefetch(psrc + 60 + src_stride * 3); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u8 = vqmovn_u16(q1u16); - d3u8 = vqmovn_u16(q2u16); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8), - vreinterpret_u16_u8(d3u8)); - d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]), - vreinterpret_u32_u16(d0x2u16.val[1])); - d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]), - vreinterpret_u8_u32(d0x2u32.val[1])); - - d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]); - d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]); - - d = pdst; - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - - q8u16 = q9u16; - d20s16 = d23s16; - q11u16 = q12u16; - q9u16 = q13u16; - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - } - } - return; -} - -void vpx_convolve8_vert_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, // unused - int x_step_q4, // unused - const int16_t *filter_y, - int y_step_q4, - int w, - int h) { - int height; - const uint8_t *s; - uint8_t *d; - uint32x2_t d2u32, d3u32; - uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - - assert(y_step_q4 == 16); - - src -= src_stride * 3; - q0s16 = vld1q_s16(filter_y); - for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h - s = src; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); - s += src_stride; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1); - s += src_stride; - d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0); - s += src_stride; - - q8u16 = vmovl_u8(vreinterpret_u8_u32(d16u32)); - q9u16 = vmovl_u8(vreinterpret_u8_u32(d18u32)); - q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32)); - q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32)); - - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d = dst; - for (height = h; height > 0; height -= 4) { // loop_vert - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1); - s += src_stride; - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1); - s += src_stride; - - q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32)); - q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32)); - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - __builtin_prefetch(d); - __builtin_prefetch(d + dst_stride); - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16, - d20s16, d21s16, d22s16, d24s16, q0s16); - __builtin_prefetch(d + dst_stride * 2); - __builtin_prefetch(d + dst_stride * 3); - q2s32 = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16, - d21s16, d22s16, d24s16, d26s16, q0s16); - __builtin_prefetch(s); - __builtin_prefetch(s + src_stride); - q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16, - d22s16, d24s16, d26s16, d27s16, q0s16); - __builtin_prefetch(s + src_stride * 2); - __builtin_prefetch(s + src_stride * 3); - q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16)); - d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16)); - - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - d += dst_stride; - - q8u16 = q10u16; - d18s16 = d22s16; - d19s16 = d24s16; - q10u16 = q13u16; - d22s16 = d25s16; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c deleted file mode 100644 index dc58a332f8..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" - -void vpx_convolve_avg_neon( - const uint8_t *src, // r0 - ptrdiff_t src_stride, // r1 - uint8_t *dst, // r2 - ptrdiff_t dst_stride, // r3 - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, - int h) { - uint8_t *d; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint32x2_t d0u32, d2u32; - uint8x16_t q0u8, q1u8, q2u8, q3u8, q8u8, q9u8, q10u8, q11u8; - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - d = dst; - if (w > 32) { // avg64 - for (; h > 0; h -= 1) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - q2u8 = vld1q_u8(src + 32); - q3u8 = vld1q_u8(src + 48); - src += src_stride; - q8u8 = vld1q_u8(d); - q9u8 = vld1q_u8(d + 16); - q10u8 = vld1q_u8(d + 32); - q11u8 = vld1q_u8(d + 48); - d += dst_stride; - - q0u8 = vrhaddq_u8(q0u8, q8u8); - q1u8 = vrhaddq_u8(q1u8, q9u8); - q2u8 = vrhaddq_u8(q2u8, q10u8); - q3u8 = vrhaddq_u8(q3u8, q11u8); - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - vst1q_u8(dst + 32, q2u8); - vst1q_u8(dst + 48, q3u8); - dst += dst_stride; - } - } else if (w == 32) { // avg32 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - src += src_stride; - q2u8 = vld1q_u8(src); - q3u8 = vld1q_u8(src + 16); - src += src_stride; - q8u8 = vld1q_u8(d); - q9u8 = vld1q_u8(d + 16); - d += dst_stride; - q10u8 = vld1q_u8(d); - q11u8 = vld1q_u8(d + 16); - d += dst_stride; - - q0u8 = vrhaddq_u8(q0u8, q8u8); - q1u8 = vrhaddq_u8(q1u8, q9u8); - q2u8 = vrhaddq_u8(q2u8, q10u8); - q3u8 = vrhaddq_u8(q3u8, q11u8); - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - dst += dst_stride; - vst1q_u8(dst, q2u8); - vst1q_u8(dst + 16, q3u8); - dst += dst_stride; - } - } else if (w > 8) { // avg16 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - src += src_stride; - q1u8 = vld1q_u8(src); - src += src_stride; - q2u8 = vld1q_u8(d); - d += dst_stride; - q3u8 = vld1q_u8(d); - d += dst_stride; - - q0u8 = vrhaddq_u8(q0u8, q2u8); - q1u8 = vrhaddq_u8(q1u8, q3u8); - - vst1q_u8(dst, q0u8); - dst += dst_stride; - vst1q_u8(dst, q1u8); - dst += dst_stride; - } - } else if (w == 8) { // avg8 - for (; h > 0; h -= 2) { - d0u8 = vld1_u8(src); - src += src_stride; - d1u8 = vld1_u8(src); - src += src_stride; - d2u8 = vld1_u8(d); - d += dst_stride; - d3u8 = vld1_u8(d); - d += dst_stride; - - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - q0u8 = vrhaddq_u8(q0u8, q1u8); - - vst1_u8(dst, vget_low_u8(q0u8)); - dst += dst_stride; - vst1_u8(dst, vget_high_u8(q0u8)); - dst += dst_stride; - } - } else { // avg4 - for (; h > 0; h -= 2) { - d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 0); - src += src_stride; - d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 1); - src += src_stride; - d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 0); - d += dst_stride; - d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 1); - d += dst_stride; - - d0u8 = vrhadd_u8(vreinterpret_u8_u32(d0u32), - vreinterpret_u8_u32(d2u32)); - - d0u32 = vreinterpret_u32_u8(d0u8); - vst1_lane_u32((uint32_t *)dst, d0u32, 0); - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, d0u32, 1); - dst += dst_stride; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c deleted file mode 100644 index d8fb97a861..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" - -void vpx_convolve_copy_neon( - const uint8_t *src, // r0 - ptrdiff_t src_stride, // r1 - uint8_t *dst, // r2 - ptrdiff_t dst_stride, // r3 - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, - int h) { - uint8x8_t d0u8, d2u8; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - if (w > 32) { // copy64 - for (; h > 0; h--) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - q2u8 = vld1q_u8(src + 32); - q3u8 = vld1q_u8(src + 48); - src += src_stride; - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - vst1q_u8(dst + 32, q2u8); - vst1q_u8(dst + 48, q3u8); - dst += dst_stride; - } - } else if (w == 32) { // copy32 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - src += src_stride; - q2u8 = vld1q_u8(src); - q3u8 = vld1q_u8(src + 16); - src += src_stride; - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - dst += dst_stride; - vst1q_u8(dst, q2u8); - vst1q_u8(dst + 16, q3u8); - dst += dst_stride; - } - } else if (w > 8) { // copy16 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - src += src_stride; - q1u8 = vld1q_u8(src); - src += src_stride; - - vst1q_u8(dst, q0u8); - dst += dst_stride; - vst1q_u8(dst, q1u8); - dst += dst_stride; - } - } else if (w == 8) { // copy8 - for (; h > 0; h -= 2) { - d0u8 = vld1_u8(src); - src += src_stride; - d2u8 = vld1_u8(src); - src += src_stride; - - vst1_u8(dst, d0u8); - dst += dst_stride; - vst1_u8(dst, d2u8); - dst += dst_stride; - } - } else { // copy4 - for (; h > 0; h--) { - *(uint32_t *)dst = *(const uint32_t *)src; - src += src_stride; - dst += dst_stride; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c deleted file mode 100644 index 1506ce6203..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" - -void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the - * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4). - */ - DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); - - // Account for the vertical phase needing 3 lines prior and 4 lines post - int intermediate_height = h + 7; - - assert(y_step_q4 == 16); - assert(x_step_q4 == 16); - - /* Filter starting 3 lines back. The neon implementation will ignore the - * given height and filter a multiple of 4 lines. Since this goes in to - * the temp buffer which has lots of extra room and is subsequently discarded - * this is safe if somewhat less than ideal. - */ - vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, - temp, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, intermediate_height); - - /* Step into the temp buffer 3 lines to get the actual frame data */ - vpx_convolve8_vert_neon(temp + 64 * 3, 64, - dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); -} - -void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); - int intermediate_height = h + 7; - - assert(y_step_q4 == 16); - assert(x_step_q4 == 16); - - /* This implementation has the same issues as above. In addition, we only want - * to average the values after both passes. - */ - vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, - temp, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, intermediate_height); - vpx_convolve8_avg_vert_neon(temp + 64 * 3, - 64, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); -} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader.c b/thirdparty/libvpx/vpx_dsp/bitreader.c deleted file mode 100644 index 8140e78e70..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include - -#include "./vpx_config.h" - -#include "vpx_dsp/bitreader.h" -#include "vpx_dsp/prob.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_util/endian_inl.h" - -int vpx_reader_init(vpx_reader *r, - const uint8_t *buffer, - size_t size, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - if (size && !buffer) { - return 1; - } else { - r->buffer_end = buffer + size; - r->buffer = buffer; - r->value = 0; - r->count = -8; - r->range = 255; - r->decrypt_cb = decrypt_cb; - r->decrypt_state = decrypt_state; - vpx_reader_fill(r); - return vpx_read_bit(r) != 0; // marker bit - } -} - -void vpx_reader_fill(vpx_reader *r) { - const uint8_t *const buffer_end = r->buffer_end; - const uint8_t *buffer = r->buffer; - const uint8_t *buffer_start = buffer; - BD_VALUE value = r->value; - int count = r->count; - const size_t bytes_left = buffer_end - buffer; - const size_t bits_left = bytes_left * CHAR_BIT; - int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); - - if (r->decrypt_cb) { - size_t n = VPXMIN(sizeof(r->clear_buffer), bytes_left); - r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n); - buffer = r->clear_buffer; - buffer_start = r->clear_buffer; - } - if (bits_left > BD_VALUE_SIZE) { - const int bits = (shift & 0xfffffff8) + CHAR_BIT; - BD_VALUE nv; - BD_VALUE big_endian_values; - memcpy(&big_endian_values, buffer, sizeof(BD_VALUE)); -#if SIZE_MAX == 0xffffffffffffffffULL - big_endian_values = HToBE64(big_endian_values); -#else - big_endian_values = HToBE32(big_endian_values); -#endif - nv = big_endian_values >> (BD_VALUE_SIZE - bits); - count += bits; - buffer += (bits >> 3); - value = r->value | (nv << (shift & 0x7)); - } else { - const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left); - int loop_end = 0; - if (bits_over >= 0) { - count += LOTS_OF_BITS; - loop_end = bits_over; - } - - if (bits_over < 0 || bits_left) { - while (shift >= loop_end) { - count += CHAR_BIT; - value |= (BD_VALUE)*buffer++ << shift; - shift -= CHAR_BIT; - } - } - } - - // NOTE: Variable 'buffer' may not relate to 'r->buffer' after decryption, - // so we increase 'r->buffer' by the amount that 'buffer' moved, rather than - // assign 'buffer' to 'r->buffer'. - r->buffer += buffer - buffer_start; - r->value = value; - r->count = count; -} - -const uint8_t *vpx_reader_find_end(vpx_reader *r) { - // Find the end of the coded buffer - while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) { - r->count -= CHAR_BIT; - r->buffer--; - } - return r->buffer; -} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader.h b/thirdparty/libvpx/vpx_dsp/bitreader.h deleted file mode 100644 index 9a441b4107..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_BITREADER_H_ -#define VPX_DSP_BITREADER_H_ - -#include -#include - -#include "./vpx_config.h" -#include "vpx_ports/mem.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef size_t BD_VALUE; - -#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT) - -// This is meant to be a large, positive constant that can still be efficiently -// loaded as an immediate (on platforms like ARM, for example). -// Even relatively modest values like 100 would work fine. -#define LOTS_OF_BITS 0x40000000 - -typedef struct { - // Be careful when reordering this struct, it may impact the cache negatively. - BD_VALUE value; - unsigned int range; - int count; - const uint8_t *buffer_end; - const uint8_t *buffer; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - uint8_t clear_buffer[sizeof(BD_VALUE) + 1]; -} vpx_reader; - -int vpx_reader_init(vpx_reader *r, - const uint8_t *buffer, - size_t size, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -void vpx_reader_fill(vpx_reader *r); - -const uint8_t *vpx_reader_find_end(vpx_reader *r); - -static INLINE int vpx_reader_has_error(vpx_reader *r) { - // Check if we have reached the end of the buffer. - // - // Variable 'count' stores the number of bits in the 'value' buffer, minus - // 8. The top byte is part of the algorithm, and the remainder is buffered - // to be shifted into it. So if count == 8, the top 16 bits of 'value' are - // occupied, 8 for the algorithm and 8 in the buffer. - // - // When reading a byte from the user's buffer, count is filled with 8 and - // one byte is filled into the value buffer. When we reach the end of the - // data, count is additionally filled with LOTS_OF_BITS. So when - // count == LOTS_OF_BITS - 1, the user's data has been exhausted. - // - // 1 if we have tried to decode bits after the end of stream was encountered. - // 0 No error. - return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS; -} - -static INLINE int vpx_read(vpx_reader *r, int prob) { - unsigned int bit = 0; - BD_VALUE value; - BD_VALUE bigsplit; - int count; - unsigned int range; - unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT; - - if (r->count < 0) - vpx_reader_fill(r); - - value = r->value; - count = r->count; - - bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); - - range = split; - - if (value >= bigsplit) { - range = r->range - split; - value = value - bigsplit; - bit = 1; - } - - { - register int shift = vpx_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - } - r->value = value; - r->count = count; - r->range = range; - - return bit; -} - -static INLINE int vpx_read_bit(vpx_reader *r) { - return vpx_read(r, 128); // vpx_prob_half -} - -static INLINE int vpx_read_literal(vpx_reader *r, int bits) { - int literal = 0, bit; - - for (bit = bits - 1; bit >= 0; bit--) - literal |= vpx_read_bit(r) << bit; - - return literal; -} - -static INLINE int vpx_read_tree(vpx_reader *r, const vpx_tree_index *tree, - const vpx_prob *probs) { - vpx_tree_index i = 0; - - while ((i = tree[i + vpx_read(r, probs[i >> 1])]) > 0) - continue; - - return -i; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_BITREADER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c deleted file mode 100644 index d7b55cf9f4..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#include "./bitreader_buffer.h" - -size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb) { - return (rb->bit_offset + 7) >> 3; -} - -int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb) { - const size_t off = rb->bit_offset; - const size_t p = off >> 3; - const int q = 7 - (int)(off & 0x7); - if (rb->bit_buffer + p < rb->bit_buffer_end) { - const int bit = (rb->bit_buffer[p] >> q) & 1; - rb->bit_offset = off + 1; - return bit; - } else { - rb->error_handler(rb->error_handler_data); - return 0; - } -} - -int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits) { - int value = 0, bit; - for (bit = bits - 1; bit >= 0; bit--) - value |= vpx_rb_read_bit(rb) << bit; - return value; -} - -int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, - int bits) { - const int value = vpx_rb_read_literal(rb, bits); - return vpx_rb_read_bit(rb) ? -value : value; -} - -int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, - int bits) { -#if CONFIG_MISC_FIXES - const int nbits = sizeof(unsigned) * 8 - bits - 1; - const unsigned value = (unsigned)vpx_rb_read_literal(rb, bits + 1) << nbits; - return ((int) value) >> nbits; -#else - return vpx_rb_read_signed_literal(rb, bits); -#endif -} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h deleted file mode 100644 index 8a48a95ed1..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_BITREADER_BUFFER_H_ -#define VPX_DSP_BITREADER_BUFFER_H_ - -#include - -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void (*vpx_rb_error_handler)(void *data); - -struct vpx_read_bit_buffer { - const uint8_t *bit_buffer; - const uint8_t *bit_buffer_end; - size_t bit_offset; - - void *error_handler_data; - vpx_rb_error_handler error_handler; -}; - -size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb); - -int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb); - -int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits); - -int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits); - -int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_BITREADER_BUFFER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/intrapred.c b/thirdparty/libvpx/vpx_dsp/intrapred.c deleted file mode 100644 index cc4a74bd26..0000000000 --- a/thirdparty/libvpx/vpx_dsp/intrapred.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" - -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" - -#define DST(x, y) dst[(x) + (y) * stride] -#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) -#define AVG2(a, b) (((a) + (b) + 1) >> 1) - -static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) above; - // first column - for (r = 0; r < bs - 1; ++r) - dst[r * stride] = AVG2(left[r], left[r + 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // second column - for (r = 0; r < bs - 2; ++r) - dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); - dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // rest of last row - for (c = 0; c < bs - 2; ++c) - dst[(bs - 1) * stride + c] = left[bs - 1]; - - for (r = bs - 2; r >= 0; --r) - for (c = 0; c < bs - 2; ++c) - dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; -} - -#if CONFIG_MISC_FIXES -static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) above; - - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], - left[(c >> 1) + r + 2]) - : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - int size; - (void)left; - for (c = 0; c < bs; ++c) { - dst[c] = AVG2(above[c], above[c + 1]); - dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); - } - for (r = 2, size = bs - 2; r < bs; r += 2, --size) { - memcpy(dst + (r + 0) * stride, dst + (r >> 1), size); - memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size); - memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); - memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size); - } -} - -#if CONFIG_MISC_FIXES -static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) left; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], - above[(r >> 1) + c + 2]) - : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - const uint8_t above_right = above[bs - 1]; - const uint8_t *const dst_row0 = dst; - int x, size; - (void)left; - - for (x = 0; x < bs - 1; ++x) { - dst[x] = AVG3(above[x], above[x + 1], above[x + 2]); - } - dst[bs - 1] = above_right; - dst += stride; - for (x = 1, size = bs - 2; x < bs; ++x, --size) { - memcpy(dst, dst_row0 + x, size); - memset(dst + size, above_right, x + 1); - dst += stride; - } -} - -#if CONFIG_MISC_FIXES -static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) left; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = AVG3(above[r + c], above[r + c + 1], - above[r + c + 1 + (r + c + 2 < bs * 2)]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - - // first row - for (c = 0; c < bs; c++) - dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; - - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; - - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); - - // the rest of the block - for (r = 2; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } -} - -static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i; -#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 - // silence a spurious -Warray-bounds warning, possibly related to: - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 - uint8_t border[69]; -#else - uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right -#endif - - // dst(bs, bs - 2)[0], i.e., border starting at bottom-left - for (i = 0; i < bs - 2; ++i) { - border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); - } - border[bs - 2] = AVG3(above[-1], left[0], left[1]); - border[bs - 1] = AVG3(left[0], above[-1], above[0]); - border[bs - 0] = AVG3(above[-1], above[0], above[1]); - // dst[0][2, size), i.e., remaining top border ascending - for (i = 0; i < bs - 2; ++i) { - border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); - } - - for (i = 0; i < bs; ++i) { - memcpy(dst + i * stride, border + bs - 1 - i, bs); - } -} - -static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bs; r++) - dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bs - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bs; ++r) { - for (c = 0; c < bs - 2; c++) - dst[c] = dst[-stride + c - 2]; - dst += stride; - } -} - -static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) left; - - for (r = 0; r < bs; r++) { - memcpy(dst, above, bs); - dst += stride; - } -} - -static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) above; - - for (r = 0; r < bs; r++) { - memset(dst, left[r], bs); - dst += stride; - } -} - -static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - int ytop_left = above[-1]; - - for (r = 0; r < bs; r++) { - for (c = 0; c < bs; c++) - dst[c] = clip_pixel(left[r] + above[c] - ytop_left); - dst += stride; - } -} - -static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) above; - (void) left; - - for (r = 0; r < bs; r++) { - memset(dst, 128, bs); - dst += stride; - } -} - -static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, - const uint8_t *left) { - int i, r, expected_dc, sum = 0; - (void) above; - - for (i = 0; i < bs; i++) - sum += left[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i, r, expected_dc, sum = 0; - (void) left; - - for (i = 0; i < bs; i++) - sum += above[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i, r, expected_dc, sum = 0; - const int count = 2 * bs; - - for (i = 0; i < bs; i++) { - sum += above[i]; - sum += left[i]; - } - - expected_dc = (sum + (count >> 1)) / count; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int H = above[-1]; - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - - memset(dst + stride * 0, AVG3(H, I, J), 4); - memset(dst + stride * 1, AVG3(I, J, K), 4); - memset(dst + stride * 2, AVG3(J, K, L), 4); - memset(dst + stride * 3, AVG3(K, L, L), 4); -} - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int H = above[-1]; - const int I = above[0]; - const int J = above[1]; - const int K = above[2]; - const int L = above[3]; - const int M = above[4]; - (void)left; - - dst[0] = AVG3(H, I, J); - dst[1] = AVG3(I, J, K); - dst[2] = AVG3(J, K, L); - dst[3] = AVG3(K, L, M); - memcpy(dst + stride * 1, dst, 4); - memcpy(dst + stride * 2, dst, 4); - memcpy(dst + stride * 3, dst, 4); -} - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - (void)above; - DST(0, 0) = AVG2(I, J); - DST(2, 0) = DST(0, 1) = AVG2(J, K); - DST(2, 1) = DST(0, 2) = AVG2(K, L); - DST(1, 0) = AVG3(I, J, K); - DST(3, 0) = DST(1, 1) = AVG3(J, K, L); - DST(3, 1) = DST(1, 2) = AVG3(K, L, L); - DST(3, 2) = DST(2, 2) = - DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; -} - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - (void)left; - DST(0, 0) = AVG2(A, B); - DST(1, 0) = DST(0, 2) = AVG2(B, C); - DST(2, 0) = DST(1, 2) = AVG2(C, D); - DST(3, 0) = DST(2, 2) = AVG2(D, E); - DST(3, 2) = AVG2(E, F); // differs from vp8 - - DST(0, 1) = AVG3(A, B, C); - DST(1, 1) = DST(0, 3) = AVG3(B, C, D); - DST(2, 1) = DST(1, 3) = AVG3(C, D, E); - DST(3, 1) = DST(2, 3) = AVG3(D, E, F); - DST(3, 3) = AVG3(E, F, G); // differs from vp8 -} - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)left; - DST(0, 0) = AVG2(A, B); - DST(1, 0) = DST(0, 2) = AVG2(B, C); - DST(2, 0) = DST(1, 2) = AVG2(C, D); - DST(3, 0) = DST(2, 2) = AVG2(D, E); - DST(3, 2) = AVG3(E, F, G); - - DST(0, 1) = AVG3(A, B, C); - DST(1, 1) = DST(0, 3) = AVG3(B, C, D); - DST(2, 1) = DST(1, 3) = AVG3(C, D, E); - DST(3, 1) = DST(2, 3) = AVG3(D, E, F); - DST(3, 3) = AVG3(F, G, H); -} - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)stride; - (void)left; - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); - DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); - DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); - DST(3, 2) = DST(2, 3) = AVG3(F, G, H); - DST(3, 3) = H; // differs from vp8 -} - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)stride; - (void)left; - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); - DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); - DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); - DST(3, 2) = DST(2, 3) = AVG3(F, G, H); - DST(3, 3) = AVG3(G, H, H); -} - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - DST(0, 0) = DST(1, 2) = AVG2(X, A); - DST(1, 0) = DST(2, 2) = AVG2(A, B); - DST(2, 0) = DST(3, 2) = AVG2(B, C); - DST(3, 0) = AVG2(C, D); - - DST(0, 3) = AVG3(K, J, I); - DST(0, 2) = AVG3(J, I, X); - DST(0, 1) = DST(1, 3) = AVG3(I, X, A); - DST(1, 1) = DST(2, 3) = AVG3(X, A, B); - DST(2, 1) = DST(3, 3) = AVG3(A, B, C); - DST(3, 1) = AVG3(B, C, D); -} - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - (void)stride; - DST(0, 3) = AVG3(J, K, L); - DST(1, 3) = DST(0, 2) = AVG3(I, J, K); - DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); - DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); - DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); - DST(3, 1) = DST(2, 0) = AVG3(C, B, A); - DST(3, 0) = AVG3(D, C, B); -} - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - - DST(0, 0) = DST(2, 1) = AVG2(I, X); - DST(0, 1) = DST(2, 2) = AVG2(J, I); - DST(0, 2) = DST(2, 3) = AVG2(K, J); - DST(0, 3) = AVG2(L, K); - - DST(3, 0) = AVG3(A, B, C); - DST(2, 0) = AVG3(X, A, B); - DST(1, 0) = DST(3, 1) = AVG3(I, X, A); - DST(1, 1) = DST(3, 2) = AVG3(J, I, X); - DST(1, 2) = DST(3, 3) = AVG3(K, J, I); - DST(1, 3) = AVG3(L, K, J); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) above; - (void) bd; - - // First column. - for (r = 0; r < bs - 1; ++r) { - dst[r * stride] = AVG2(left[r], left[r + 1]); - } - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // Second column. - for (r = 0; r < bs - 2; ++r) { - dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); - } - dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // Rest of last row. - for (c = 0; c < bs - 2; ++c) - dst[(bs - 1) * stride + c] = left[bs - 1]; - - for (r = bs - 2; r >= 0; --r) { - for (c = 0; c < bs - 2; ++c) - dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; - } -} - -#if CONFIG_MISC_FIXES -static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) above; - (void) bd; - - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], - left[(c >> 1) + r + 2]) - : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], - above[(r >> 1) + c + 2]) - : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); - } - dst += stride; - } -} - -#define highbd_d63e_predictor highbd_d63_predictor - -static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, - const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1], - above[r + c + 2]) - : above[bs * 2 - 1]; - } - dst += stride; - } -} - -#if CONFIG_MISC_FIXES -static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = AVG3(above[r + c], above[r + c + 1], - above[r + c + 1 + (r + c + 2 < bs * 2)]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - - // first row - for (c = 0; c < bs; c++) - dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; - - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; - - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); - - // the rest of the block - for (r = 2; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } -} - -static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; ++r) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - - dst += stride; - for (r = 1; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-stride + c - 1]; - dst += stride; - } -} - -static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bs; r++) - dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bs - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bs; ++r) { - for (c = 0; c < bs - 2; c++) - dst[c] = dst[-stride + c - 2]; - dst += stride; - } -} - -static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) left; - (void) bd; - for (r = 0; r < bs; r++) { - memcpy(dst, above, bs * sizeof(uint16_t)); - dst += stride; - } -} - -static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) above; - (void) bd; - for (r = 0; r < bs; r++) { - vpx_memset16(dst, left[r], bs); - dst += stride; - } -} - -static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - int ytop_left = above[-1]; - (void) bd; - - for (r = 0; r < bs; r++) { - for (c = 0; c < bs; c++) - dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd); - dst += stride; - } -} - -static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) above; - (void) left; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, 128 << (bd - 8), bs); - dst += stride; - } -} - -static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - (void) above; - (void) bd; - - for (i = 0; i < bs; i++) - sum += left[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - (void) left; - (void) bd; - - for (i = 0; i < bs; i++) - sum += above[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - const int count = 2 * bs; - (void) bd; - - for (i = 0; i < bs; i++) { - sum += above[i]; - sum += left[i]; - } - - expected_dc = (sum + (count >> 1)) / count; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -// This serves as a wrapper function, so that all the prediction functions -// can be unified and accessed as a pointer array. Note that the boundary -// above and left are not necessarily used all the time. -#define intra_pred_sized(type, size) \ - void vpx_##type##_predictor_##size##x##size##_c(uint8_t *dst, \ - ptrdiff_t stride, \ - const uint8_t *above, \ - const uint8_t *left) { \ - type##_predictor(dst, stride, size, above, left); \ - } - -#if CONFIG_VP9_HIGHBITDEPTH -#define intra_pred_highbd_sized(type, size) \ - void vpx_highbd_##type##_predictor_##size##x##size##_c( \ - uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ - const uint16_t *left, int bd) { \ - highbd_##type##_predictor(dst, stride, size, above, left, bd); \ - } - -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 4) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) \ - intra_pred_highbd_sized(type, 4) \ - intra_pred_highbd_sized(type, 8) \ - intra_pred_highbd_sized(type, 16) \ - intra_pred_highbd_sized(type, 32) - -#define intra_pred_no_4x4(type) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) \ - intra_pred_highbd_sized(type, 4) \ - intra_pred_highbd_sized(type, 8) \ - intra_pred_highbd_sized(type, 16) \ - intra_pred_highbd_sized(type, 32) - -#else -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 4) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) - -#define intra_pred_no_4x4(type) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) -#endif // CONFIG_VP9_HIGHBITDEPTH - -intra_pred_no_4x4(d207) -intra_pred_no_4x4(d63) -intra_pred_no_4x4(d45) -#if CONFIG_MISC_FIXES -intra_pred_allsizes(d207e) -intra_pred_allsizes(d63e) -intra_pred_no_4x4(d45e) -#endif -intra_pred_no_4x4(d117) -intra_pred_no_4x4(d135) -intra_pred_no_4x4(d153) -intra_pred_allsizes(v) -intra_pred_allsizes(h) -intra_pred_allsizes(tm) -intra_pred_allsizes(dc_128) -intra_pred_allsizes(dc_left) -intra_pred_allsizes(dc_top) -intra_pred_allsizes(dc) -#undef intra_pred_allsizes diff --git a/thirdparty/libvpx/vpx_dsp/inv_txfm.c b/thirdparty/libvpx/vpx_dsp/inv_txfm.c deleted file mode 100644 index e18d31d7aa..0000000000 --- a/thirdparty/libvpx/vpx_dsp/inv_txfm.c +++ /dev/null @@ -1,2518 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/inv_txfm.h" - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { -/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, - 0.5 shifts per pixel. */ - int i; - tran_low_t output[16]; - tran_high_t a1, b1, c1, d1, e1; - const tran_low_t *ip = input; - tran_low_t *op = output; - - for (i = 0; i < 4; i++) { - a1 = ip[0] >> UNIT_QUANT_SHIFT; - c1 = ip[1] >> UNIT_QUANT_SHIFT; - d1 = ip[2] >> UNIT_QUANT_SHIFT; - b1 = ip[3] >> UNIT_QUANT_SHIFT; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - op[0] = WRAPLOW(a1); - op[1] = WRAPLOW(b1); - op[2] = WRAPLOW(c1); - op[3] = WRAPLOW(d1); - ip += 4; - op += 4; - } - - ip = output; - for (i = 0; i < 4; i++) { - a1 = ip[4 * 0]; - c1 = ip[4 * 1]; - d1 = ip[4 * 2]; - b1 = ip[4 * 3]; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1)); - dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1)); - dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1)); - dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1)); - - ip++; - dest++; - } -} - -void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { - int i; - tran_high_t a1, e1; - tran_low_t tmp[4]; - const tran_low_t *ip = in; - tran_low_t *op = tmp; - - a1 = ip[0] >> UNIT_QUANT_SHIFT; - e1 = a1 >> 1; - a1 -= e1; - op[0] = WRAPLOW(a1); - op[1] = op[2] = op[3] = WRAPLOW(e1); - - ip = tmp; - for (i = 0; i < 4; i++) { - e1 = ip[0] >> 1; - a1 = ip[0] - e1; - dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1); - dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1); - dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1); - dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1); - ip++; - dest++; - } -} - -void idct4_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step[4]; - tran_high_t temp1, temp2; - // stage 1 - temp1 = (input[0] + input[2]) * cospi_16_64; - temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = WRAPLOW(dct_const_round_shift(temp1)); - step[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; - temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = WRAPLOW(dct_const_round_shift(temp1)); - step[3] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 2 - output[0] = WRAPLOW(step[0] + step[3]); - output[1] = WRAPLOW(step[1] + step[2]); - output[2] = WRAPLOW(step[1] - step[2]); - output[3] = WRAPLOW(step[0] - step[3]); -} - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[4], temp_out[4]; - - // Rows - for (i = 0; i < 4; ++i) { - idct4_c(input, outptr); - input += 4; - outptr += 4; - } - - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - idct4_c(temp_in, temp_out); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 4)); - } - } -} - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, - int dest_stride) { - int i; - tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 4); - - for (i = 0; i < 4; i++) { - dest[0] = clip_pixel_add(dest[0], a1); - dest[1] = clip_pixel_add(dest[1], a1); - dest[2] = clip_pixel_add(dest[2], a1); - dest[3] = clip_pixel_add(dest[3], a1); - dest += dest_stride; - } -} - -void idct8_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step1[8], step2[8]; - tran_high_t temp1, temp2; - // stage 1 - step1[0] = input[0]; - step1[2] = input[4]; - step1[1] = input[2]; - step1[3] = input[6]; - temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; - temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1)); - step1[7] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; - temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 2 - temp1 = (step1[0] + step1[2]) * cospi_16_64; - temp2 = (step1[0] - step1[2]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1)); - step2[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1)); - step2[3] = WRAPLOW(dct_const_round_shift(temp2)); - step2[4] = WRAPLOW(step1[4] + step1[5]); - step2[5] = WRAPLOW(step1[4] - step1[5]); - step2[6] = WRAPLOW(-step1[6] + step1[7]); - step2[7] = WRAPLOW(step1[6] + step1[7]); - - // stage 3 - step1[0] = WRAPLOW(step2[0] + step2[3]); - step1[1] = WRAPLOW(step2[1] + step2[2]); - step1[2] = WRAPLOW(step2[1] - step2[2]); - step1[3] = WRAPLOW(step2[0] - step2[3]); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - step1[7] = step2[7]; - - // stage 4 - output[0] = WRAPLOW(step1[0] + step1[7]); - output[1] = WRAPLOW(step1[1] + step1[6]); - output[2] = WRAPLOW(step1[2] + step1[5]); - output[3] = WRAPLOW(step1[3] + step1[4]); - output[4] = WRAPLOW(step1[3] - step1[4]); - output[5] = WRAPLOW(step1[2] - step1[5]); - output[6] = WRAPLOW(step1[1] - step1[6]); - output[7] = WRAPLOW(step1[0] - step1[7]); -} - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - - // First transform rows - for (i = 0; i < 8; ++i) { - idct8_c(input, outptr); - input += 8; - outptr += 8; - } - - // Then transform columns - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - idct8_c(temp_in, temp_out); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 5)); - } - } -} - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - int i, j; - tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 5); - for (j = 0; j < 8; ++j) { - for (i = 0; i < 8; ++i) - dest[i] = clip_pixel_add(dest[i], a1); - dest += stride; - } -} - -void iadst4_c(const tran_low_t *input, tran_low_t *output) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - - tran_low_t x0 = input[0]; - tran_low_t x1 = input[1]; - tran_low_t x2 = input[2]; - tran_low_t x3 = input[3]; - - if (!(x0 | x1 | x2 | x3)) { - output[0] = output[1] = output[2] = output[3] = 0; - return; - } - - s0 = sinpi_1_9 * x0; - s1 = sinpi_2_9 * x0; - s2 = sinpi_3_9 * x1; - s3 = sinpi_4_9 * x2; - s4 = sinpi_1_9 * x2; - s5 = sinpi_2_9 * x3; - s6 = sinpi_4_9 * x3; - s7 = WRAPLOW(x0 - x2 + x3); - - s0 = s0 + s3 + s5; - s1 = s1 - s4 - s6; - s3 = s2; - s2 = sinpi_3_9 * s7; - - // 1-D transform scaling factor is sqrt(2). - // The overall dynamic range is 14b (input) + 14b (multiplication scaling) - // + 1b (addition) = 29b. - // Hence the output bit depth is 15b. - output[0] = WRAPLOW(dct_const_round_shift(s0 + s3)); - output[1] = WRAPLOW(dct_const_round_shift(s1 + s3)); - output[2] = WRAPLOW(dct_const_round_shift(s2)); - output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3)); -} - -void iadst8_c(const tran_low_t *input, tran_low_t *output) { - int s0, s1, s2, s3, s4, s5, s6, s7; - - tran_high_t x0 = input[7]; - tran_high_t x1 = input[0]; - tran_high_t x2 = input[5]; - tran_high_t x3 = input[2]; - tran_high_t x4 = input[3]; - tran_high_t x5 = input[4]; - tran_high_t x6 = input[1]; - tran_high_t x7 = input[6]; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { - output[0] = output[1] = output[2] = output[3] = output[4] - = output[5] = output[6] = output[7] = 0; - return; - } - - // stage 1 - s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); - s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); - s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); - s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); - s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); - s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); - s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); - s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); - - x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); - x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); - x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); - x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); - x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); - x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); - x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); - x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); - - // stage 2 - s0 = (int)x0; - s1 = (int)x1; - s2 = (int)x2; - s3 = (int)x3; - s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); - s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); - s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); - s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); - - x0 = WRAPLOW(s0 + s2); - x1 = WRAPLOW(s1 + s3); - x2 = WRAPLOW(s0 - s2); - x3 = WRAPLOW(s1 - s3); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); - - // stage 3 - s2 = (int)(cospi_16_64 * (x2 + x3)); - s3 = (int)(cospi_16_64 * (x2 - x3)); - s6 = (int)(cospi_16_64 * (x6 + x7)); - s7 = (int)(cospi_16_64 * (x6 - x7)); - - x2 = WRAPLOW(dct_const_round_shift(s2)); - x3 = WRAPLOW(dct_const_round_shift(s3)); - x6 = WRAPLOW(dct_const_round_shift(s6)); - x7 = WRAPLOW(dct_const_round_shift(s7)); - - output[0] = WRAPLOW(x0); - output[1] = WRAPLOW(-x4); - output[2] = WRAPLOW(x6); - output[3] = WRAPLOW(-x2); - output[4] = WRAPLOW(x3); - output[5] = WRAPLOW(-x7); - output[6] = WRAPLOW(x5); - output[7] = WRAPLOW(-x1); -} - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - tran_low_t out[8 * 8] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - - // First transform rows - // only first 4 row has non-zero coefs - for (i = 0; i < 4; ++i) { - idct8_c(input, outptr); - input += 8; - outptr += 8; - } - - // Then transform columns - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - idct8_c(temp_in, temp_out); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 5)); - } - } -} - -void idct16_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step1[16], step2[16]; - tran_high_t temp1, temp2; - - // stage 1 - step1[0] = input[0/2]; - step1[1] = input[16/2]; - step1[2] = input[8/2]; - step1[3] = input[24/2]; - step1[4] = input[4/2]; - step1[5] = input[20/2]; - step1[6] = input[12/2]; - step1[7] = input[28/2]; - step1[8] = input[2/2]; - step1[9] = input[18/2]; - step1[10] = input[10/2]; - step1[11] = input[26/2]; - step1[12] = input[6/2]; - step1[13] = input[22/2]; - step1[14] = input[14/2]; - step1[15] = input[30/2]; - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1)); - step2[15] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1)); - step1[7] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - - step1[8] = WRAPLOW(step2[8] + step2[9]); - step1[9] = WRAPLOW(step2[8] - step2[9]); - step1[10] = WRAPLOW(-step2[10] + step2[11]); - step1[11] = WRAPLOW(step2[10] + step2[11]); - step1[12] = WRAPLOW(step2[12] + step2[13]); - step1[13] = WRAPLOW(step2[12] - step2[13]); - step1[14] = WRAPLOW(-step2[14] + step2[15]); - step1[15] = WRAPLOW(step2[14] + step2[15]); - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1)); - step2[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1)); - step2[3] = WRAPLOW(dct_const_round_shift(temp2)); - step2[4] = WRAPLOW(step1[4] + step1[5]); - step2[5] = WRAPLOW(step1[4] - step1[5]); - step2[6] = WRAPLOW(-step1[6] + step1[7]); - step2[7] = WRAPLOW(step1[6] + step1[7]); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - step2[11] = step1[11]; - step2[12] = step1[12]; - - // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3]); - step1[1] = WRAPLOW(step2[1] + step2[2]); - step1[2] = WRAPLOW(step2[1] - step2[2]); - step1[3] = WRAPLOW(step2[0] - step2[3]); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - step1[7] = step2[7]; - - step1[8] = WRAPLOW(step2[8] + step2[11]); - step1[9] = WRAPLOW(step2[9] + step2[10]); - step1[10] = WRAPLOW(step2[9] - step2[10]); - step1[11] = WRAPLOW(step2[8] - step2[11]); - step1[12] = WRAPLOW(-step2[12] + step2[15]); - step1[13] = WRAPLOW(-step2[13] + step2[14]); - step1[14] = WRAPLOW(step2[13] + step2[14]); - step1[15] = WRAPLOW(step2[12] + step2[15]); - - // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7]); - step2[1] = WRAPLOW(step1[1] + step1[6]); - step2[2] = WRAPLOW(step1[2] + step1[5]); - step2[3] = WRAPLOW(step1[3] + step1[4]); - step2[4] = WRAPLOW(step1[3] - step1[4]); - step2[5] = WRAPLOW(step1[2] - step1[5]); - step2[6] = WRAPLOW(step1[1] - step1[6]); - step2[7] = WRAPLOW(step1[0] - step1[7]); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15]); - output[1] = WRAPLOW(step2[1] + step2[14]); - output[2] = WRAPLOW(step2[2] + step2[13]); - output[3] = WRAPLOW(step2[3] + step2[12]); - output[4] = WRAPLOW(step2[4] + step2[11]); - output[5] = WRAPLOW(step2[5] + step2[10]); - output[6] = WRAPLOW(step2[6] + step2[9]); - output[7] = WRAPLOW(step2[7] + step2[8]); - output[8] = WRAPLOW(step2[7] - step2[8]); - output[9] = WRAPLOW(step2[6] - step2[9]); - output[10] = WRAPLOW(step2[5] - step2[10]); - output[11] = WRAPLOW(step2[4] - step2[11]); - output[12] = WRAPLOW(step2[3] - step2[12]); - output[13] = WRAPLOW(step2[2] - step2[13]); - output[14] = WRAPLOW(step2[1] - step2[14]); - output[15] = WRAPLOW(step2[0] - step2[15]); -} - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - - // First transform rows - for (i = 0; i < 16; ++i) { - idct16_c(input, outptr); - input += 16; - outptr += 16; - } - - // Then transform columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - idct16_c(temp_in, temp_out); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void iadst16_c(const tran_low_t *input, tran_low_t *output) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; - tran_high_t s9, s10, s11, s12, s13, s14, s15; - - tran_high_t x0 = input[15]; - tran_high_t x1 = input[0]; - tran_high_t x2 = input[13]; - tran_high_t x3 = input[2]; - tran_high_t x4 = input[11]; - tran_high_t x5 = input[4]; - tran_high_t x6 = input[9]; - tran_high_t x7 = input[6]; - tran_high_t x8 = input[7]; - tran_high_t x9 = input[8]; - tran_high_t x10 = input[5]; - tran_high_t x11 = input[10]; - tran_high_t x12 = input[3]; - tran_high_t x13 = input[12]; - tran_high_t x14 = input[1]; - tran_high_t x15 = input[14]; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 - | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { - output[0] = output[1] = output[2] = output[3] = output[4] - = output[5] = output[6] = output[7] = output[8] - = output[9] = output[10] = output[11] = output[12] - = output[13] = output[14] = output[15] = 0; - return; - } - - // stage 1 - s0 = x0 * cospi_1_64 + x1 * cospi_31_64; - s1 = x0 * cospi_31_64 - x1 * cospi_1_64; - s2 = x2 * cospi_5_64 + x3 * cospi_27_64; - s3 = x2 * cospi_27_64 - x3 * cospi_5_64; - s4 = x4 * cospi_9_64 + x5 * cospi_23_64; - s5 = x4 * cospi_23_64 - x5 * cospi_9_64; - s6 = x6 * cospi_13_64 + x7 * cospi_19_64; - s7 = x6 * cospi_19_64 - x7 * cospi_13_64; - s8 = x8 * cospi_17_64 + x9 * cospi_15_64; - s9 = x8 * cospi_15_64 - x9 * cospi_17_64; - s10 = x10 * cospi_21_64 + x11 * cospi_11_64; - s11 = x10 * cospi_11_64 - x11 * cospi_21_64; - s12 = x12 * cospi_25_64 + x13 * cospi_7_64; - s13 = x12 * cospi_7_64 - x13 * cospi_25_64; - s14 = x14 * cospi_29_64 + x15 * cospi_3_64; - s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - - x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); - x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); - x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); - x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); - x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); - x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); - x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); - x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); - x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); - x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); - x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); - x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); - x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); - x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); - x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); - x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); - - // stage 2 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4; - s5 = x5; - s6 = x6; - s7 = x7; - s8 = x8 * cospi_4_64 + x9 * cospi_28_64; - s9 = x8 * cospi_28_64 - x9 * cospi_4_64; - s10 = x10 * cospi_20_64 + x11 * cospi_12_64; - s11 = x10 * cospi_12_64 - x11 * cospi_20_64; - s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; - s13 = x12 * cospi_4_64 + x13 * cospi_28_64; - s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; - s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - - x0 = WRAPLOW(s0 + s4); - x1 = WRAPLOW(s1 + s5); - x2 = WRAPLOW(s2 + s6); - x3 = WRAPLOW(s3 + s7); - x4 = WRAPLOW(s0 - s4); - x5 = WRAPLOW(s1 - s5); - x6 = WRAPLOW(s2 - s6); - x7 = WRAPLOW(s3 - s7); - x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); - x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); - x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); - x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); - x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); - x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); - x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); - x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); - - // stage 3 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4 * cospi_8_64 + x5 * cospi_24_64; - s5 = x4 * cospi_24_64 - x5 * cospi_8_64; - s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; - s7 = x6 * cospi_8_64 + x7 * cospi_24_64; - s8 = x8; - s9 = x9; - s10 = x10; - s11 = x11; - s12 = x12 * cospi_8_64 + x13 * cospi_24_64; - s13 = x12 * cospi_24_64 - x13 * cospi_8_64; - s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; - s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - - x0 = WRAPLOW(s0 + s2); - x1 = WRAPLOW(s1 + s3); - x2 = WRAPLOW(s0 - s2); - x3 = WRAPLOW(s1 - s3); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); - x8 = WRAPLOW(s8 + s10); - x9 = WRAPLOW(s9 + s11); - x10 = WRAPLOW(s8 - s10); - x11 = WRAPLOW(s9 - s11); - x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); - x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); - x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); - x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); - - // stage 4 - s2 = (- cospi_16_64) * (x2 + x3); - s3 = cospi_16_64 * (x2 - x3); - s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (- x6 + x7); - s10 = cospi_16_64 * (x10 + x11); - s11 = cospi_16_64 * (- x10 + x11); - s14 = (- cospi_16_64) * (x14 + x15); - s15 = cospi_16_64 * (x14 - x15); - - x2 = WRAPLOW(dct_const_round_shift(s2)); - x3 = WRAPLOW(dct_const_round_shift(s3)); - x6 = WRAPLOW(dct_const_round_shift(s6)); - x7 = WRAPLOW(dct_const_round_shift(s7)); - x10 = WRAPLOW(dct_const_round_shift(s10)); - x11 = WRAPLOW(dct_const_round_shift(s11)); - x14 = WRAPLOW(dct_const_round_shift(s14)); - x15 = WRAPLOW(dct_const_round_shift(s15)); - - output[0] = WRAPLOW(x0); - output[1] = WRAPLOW(-x8); - output[2] = WRAPLOW(x12); - output[3] = WRAPLOW(-x4); - output[4] = WRAPLOW(x6); - output[5] = WRAPLOW(x14); - output[6] = WRAPLOW(x10); - output[7] = WRAPLOW(x2); - output[8] = WRAPLOW(x3); - output[9] = WRAPLOW(x11); - output[10] = WRAPLOW(x15); - output[11] = WRAPLOW(x7); - output[12] = WRAPLOW(x5); - output[13] = WRAPLOW(-x13); - output[14] = WRAPLOW(x9); - output[15] = WRAPLOW(-x1); -} - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[16 * 16] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - - // First transform rows. Since all non-zero dct coefficients are in - // upper-left 4x4 area, we only need to calculate first 4 rows here. - for (i = 0; i < 4; ++i) { - idct16_c(input, outptr); - input += 16; - outptr += 16; - } - - // Then transform columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - idct16_c(temp_in, temp_out); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - int i, j; - tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 6); - for (j = 0; j < 16; ++j) { - for (i = 0; i < 16; ++i) - dest[i] = clip_pixel_add(dest[i], a1); - dest += stride; - } -} - -void idct32_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step1[32], step2[32]; - tran_high_t temp1, temp2; - - // stage 1 - step1[0] = input[0]; - step1[1] = input[16]; - step1[2] = input[8]; - step1[3] = input[24]; - step1[4] = input[4]; - step1[5] = input[20]; - step1[6] = input[12]; - step1[7] = input[28]; - step1[8] = input[2]; - step1[9] = input[18]; - step1[10] = input[10]; - step1[11] = input[26]; - step1[12] = input[6]; - step1[13] = input[22]; - step1[14] = input[14]; - step1[15] = input[30]; - - temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; - temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = WRAPLOW(dct_const_round_shift(temp1)); - step1[31] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; - temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1)); - step1[30] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; - temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1)); - step1[29] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; - temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1)); - step1[28] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; - temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1)); - step1[27] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; - temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; - temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1)); - step1[25] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; - temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1)); - step1[24] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1)); - step2[15] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - - step2[16] = WRAPLOW(step1[16] + step1[17]); - step2[17] = WRAPLOW(step1[16] - step1[17]); - step2[18] = WRAPLOW(-step1[18] + step1[19]); - step2[19] = WRAPLOW(step1[18] + step1[19]); - step2[20] = WRAPLOW(step1[20] + step1[21]); - step2[21] = WRAPLOW(step1[20] - step1[21]); - step2[22] = WRAPLOW(-step1[22] + step1[23]); - step2[23] = WRAPLOW(step1[22] + step1[23]); - step2[24] = WRAPLOW(step1[24] + step1[25]); - step2[25] = WRAPLOW(step1[24] - step1[25]); - step2[26] = WRAPLOW(-step1[26] + step1[27]); - step2[27] = WRAPLOW(step1[26] + step1[27]); - step2[28] = WRAPLOW(step1[28] + step1[29]); - step2[29] = WRAPLOW(step1[28] - step1[29]); - step2[30] = WRAPLOW(-step1[30] + step1[31]); - step2[31] = WRAPLOW(step1[30] + step1[31]); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1)); - step1[7] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - - step1[8] = WRAPLOW(step2[8] + step2[9]); - step1[9] = WRAPLOW(step2[8] - step2[9]); - step1[10] = WRAPLOW(-step2[10] + step2[11]); - step1[11] = WRAPLOW(step2[10] + step2[11]); - step1[12] = WRAPLOW(step2[12] + step2[13]); - step1[13] = WRAPLOW(step2[12] - step2[13]); - step1[14] = WRAPLOW(-step2[14] + step2[15]); - step1[15] = WRAPLOW(step2[14] + step2[15]); - - step1[16] = step2[16]; - step1[31] = step2[31]; - temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; - temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1)); - step1[30] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; - temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1)); - step1[29] = WRAPLOW(dct_const_round_shift(temp2)); - step1[19] = step2[19]; - step1[20] = step2[20]; - temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; - temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; - temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1)); - step1[25] = WRAPLOW(dct_const_round_shift(temp2)); - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[27] = step2[27]; - step1[28] = step2[28]; - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1)); - step2[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1)); - step2[3] = WRAPLOW(dct_const_round_shift(temp2)); - step2[4] = WRAPLOW(step1[4] + step1[5]); - step2[5] = WRAPLOW(step1[4] - step1[5]); - step2[6] = WRAPLOW(-step1[6] + step1[7]); - step2[7] = WRAPLOW(step1[6] + step1[7]); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - step2[11] = step1[11]; - step2[12] = step1[12]; - - step2[16] = WRAPLOW(step1[16] + step1[19]); - step2[17] = WRAPLOW(step1[17] + step1[18]); - step2[18] = WRAPLOW(step1[17] - step1[18]); - step2[19] = WRAPLOW(step1[16] - step1[19]); - step2[20] = WRAPLOW(-step1[20] + step1[23]); - step2[21] = WRAPLOW(-step1[21] + step1[22]); - step2[22] = WRAPLOW(step1[21] + step1[22]); - step2[23] = WRAPLOW(step1[20] + step1[23]); - - step2[24] = WRAPLOW(step1[24] + step1[27]); - step2[25] = WRAPLOW(step1[25] + step1[26]); - step2[26] = WRAPLOW(step1[25] - step1[26]); - step2[27] = WRAPLOW(step1[24] - step1[27]); - step2[28] = WRAPLOW(-step1[28] + step1[31]); - step2[29] = WRAPLOW(-step1[29] + step1[30]); - step2[30] = WRAPLOW(step1[29] + step1[30]); - step2[31] = WRAPLOW(step1[28] + step1[31]); - - // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3]); - step1[1] = WRAPLOW(step2[1] + step2[2]); - step1[2] = WRAPLOW(step2[1] - step2[2]); - step1[3] = WRAPLOW(step2[0] - step2[3]); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - step1[7] = step2[7]; - - step1[8] = WRAPLOW(step2[8] + step2[11]); - step1[9] = WRAPLOW(step2[9] + step2[10]); - step1[10] = WRAPLOW(step2[9] - step2[10]); - step1[11] = WRAPLOW(step2[8] - step2[11]); - step1[12] = WRAPLOW(-step2[12] + step2[15]); - step1[13] = WRAPLOW(-step2[13] + step2[14]); - step1[14] = WRAPLOW(step2[13] + step2[14]); - step1[15] = WRAPLOW(step2[12] + step2[15]); - - step1[16] = step2[16]; - step1[17] = step2[17]; - temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; - temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1)); - step1[29] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; - temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1)); - step1[28] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; - temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1)); - step1[27] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; - temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - step1[22] = step2[22]; - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[25] = step2[25]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7]); - step2[1] = WRAPLOW(step1[1] + step1[6]); - step2[2] = WRAPLOW(step1[2] + step1[5]); - step2[3] = WRAPLOW(step1[3] + step1[4]); - step2[4] = WRAPLOW(step1[3] - step1[4]); - step2[5] = WRAPLOW(step1[2] - step1[5]); - step2[6] = WRAPLOW(step1[1] - step1[6]); - step2[7] = WRAPLOW(step1[0] - step1[7]); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - step2[14] = step1[14]; - step2[15] = step1[15]; - - step2[16] = WRAPLOW(step1[16] + step1[23]); - step2[17] = WRAPLOW(step1[17] + step1[22]); - step2[18] = WRAPLOW(step1[18] + step1[21]); - step2[19] = WRAPLOW(step1[19] + step1[20]); - step2[20] = WRAPLOW(step1[19] - step1[20]); - step2[21] = WRAPLOW(step1[18] - step1[21]); - step2[22] = WRAPLOW(step1[17] - step1[22]); - step2[23] = WRAPLOW(step1[16] - step1[23]); - - step2[24] = WRAPLOW(-step1[24] + step1[31]); - step2[25] = WRAPLOW(-step1[25] + step1[30]); - step2[26] = WRAPLOW(-step1[26] + step1[29]); - step2[27] = WRAPLOW(-step1[27] + step1[28]); - step2[28] = WRAPLOW(step1[27] + step1[28]); - step2[29] = WRAPLOW(step1[26] + step1[29]); - step2[30] = WRAPLOW(step1[25] + step1[30]); - step2[31] = WRAPLOW(step1[24] + step1[31]); - - // stage 7 - step1[0] = WRAPLOW(step2[0] + step2[15]); - step1[1] = WRAPLOW(step2[1] + step2[14]); - step1[2] = WRAPLOW(step2[2] + step2[13]); - step1[3] = WRAPLOW(step2[3] + step2[12]); - step1[4] = WRAPLOW(step2[4] + step2[11]); - step1[5] = WRAPLOW(step2[5] + step2[10]); - step1[6] = WRAPLOW(step2[6] + step2[9]); - step1[7] = WRAPLOW(step2[7] + step2[8]); - step1[8] = WRAPLOW(step2[7] - step2[8]); - step1[9] = WRAPLOW(step2[6] - step2[9]); - step1[10] = WRAPLOW(step2[5] - step2[10]); - step1[11] = WRAPLOW(step2[4] - step2[11]); - step1[12] = WRAPLOW(step2[3] - step2[12]); - step1[13] = WRAPLOW(step2[2] - step2[13]); - step1[14] = WRAPLOW(step2[1] - step2[14]); - step1[15] = WRAPLOW(step2[0] - step2[15]); - - step1[16] = step2[16]; - step1[17] = step2[17]; - step1[18] = step2[18]; - step1[19] = step2[19]; - temp1 = (-step2[20] + step2[27]) * cospi_16_64; - temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1)); - step1[27] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step2[21] + step2[26]) * cospi_16_64; - temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step2[22] + step2[25]) * cospi_16_64; - temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1)); - step1[25] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step2[23] + step2[24]) * cospi_16_64; - temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1)); - step1[24] = WRAPLOW(dct_const_round_shift(temp2)); - step1[28] = step2[28]; - step1[29] = step2[29]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // final stage - output[0] = WRAPLOW(step1[0] + step1[31]); - output[1] = WRAPLOW(step1[1] + step1[30]); - output[2] = WRAPLOW(step1[2] + step1[29]); - output[3] = WRAPLOW(step1[3] + step1[28]); - output[4] = WRAPLOW(step1[4] + step1[27]); - output[5] = WRAPLOW(step1[5] + step1[26]); - output[6] = WRAPLOW(step1[6] + step1[25]); - output[7] = WRAPLOW(step1[7] + step1[24]); - output[8] = WRAPLOW(step1[8] + step1[23]); - output[9] = WRAPLOW(step1[9] + step1[22]); - output[10] = WRAPLOW(step1[10] + step1[21]); - output[11] = WRAPLOW(step1[11] + step1[20]); - output[12] = WRAPLOW(step1[12] + step1[19]); - output[13] = WRAPLOW(step1[13] + step1[18]); - output[14] = WRAPLOW(step1[14] + step1[17]); - output[15] = WRAPLOW(step1[15] + step1[16]); - output[16] = WRAPLOW(step1[15] - step1[16]); - output[17] = WRAPLOW(step1[14] - step1[17]); - output[18] = WRAPLOW(step1[13] - step1[18]); - output[19] = WRAPLOW(step1[12] - step1[19]); - output[20] = WRAPLOW(step1[11] - step1[20]); - output[21] = WRAPLOW(step1[10] - step1[21]); - output[22] = WRAPLOW(step1[9] - step1[22]); - output[23] = WRAPLOW(step1[8] - step1[23]); - output[24] = WRAPLOW(step1[7] - step1[24]); - output[25] = WRAPLOW(step1[6] - step1[25]); - output[26] = WRAPLOW(step1[5] - step1[26]); - output[27] = WRAPLOW(step1[4] - step1[27]); - output[28] = WRAPLOW(step1[3] - step1[28]); - output[29] = WRAPLOW(step1[2] - step1[29]); - output[30] = WRAPLOW(step1[1] - step1[30]); - output[31] = WRAPLOW(step1[0] - step1[31]); -} - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[32 * 32]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - - // Rows - for (i = 0; i < 32; ++i) { - int16_t zero_coeff[16]; - for (j = 0; j < 16; ++j) - zero_coeff[j] = input[2 * j] | input[2 * j + 1]; - for (j = 0; j < 8; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 4; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 2; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - - if (zero_coeff[0] | zero_coeff[1]) - idct32_c(input, outptr); - else - memset(outptr, 0, sizeof(tran_low_t) * 32); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_c(temp_in, temp_out); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[32 * 32] = {0}; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - - // Rows - // only upper-left 16x16 has non-zero coeff - for (i = 0; i < 16; ++i) { - idct32_c(input, outptr); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_c(temp_in, temp_out); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[32 * 32] = {0}; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - - // Rows - // only upper-left 8x8 has non-zero coeff - for (i = 0; i < 8; ++i) { - idct32_c(input, outptr); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_c(temp_in, temp_out); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - int i, j; - tran_high_t a1; - - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 6); - - for (j = 0; j < 32; ++j) { - for (i = 0; i < 32; ++i) - dest[i] = clip_pixel_add(dest[i], a1); - dest += stride; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, - 0.5 shifts per pixel. */ - int i; - tran_low_t output[16]; - tran_high_t a1, b1, c1, d1, e1; - const tran_low_t *ip = input; - tran_low_t *op = output; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - for (i = 0; i < 4; i++) { - a1 = ip[0] >> UNIT_QUANT_SHIFT; - c1 = ip[1] >> UNIT_QUANT_SHIFT; - d1 = ip[2] >> UNIT_QUANT_SHIFT; - b1 = ip[3] >> UNIT_QUANT_SHIFT; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - op[0] = HIGHBD_WRAPLOW(a1, bd); - op[1] = HIGHBD_WRAPLOW(b1, bd); - op[2] = HIGHBD_WRAPLOW(c1, bd); - op[3] = HIGHBD_WRAPLOW(d1, bd); - ip += 4; - op += 4; - } - - ip = output; - for (i = 0; i < 4; i++) { - a1 = ip[4 * 0]; - c1 = ip[4 * 1]; - d1 = ip[4 * 2]; - b1 = ip[4 * 3]; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], - HIGHBD_WRAPLOW(a1, bd), bd); - dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], - HIGHBD_WRAPLOW(b1, bd), bd); - dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], - HIGHBD_WRAPLOW(c1, bd), bd); - dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], - HIGHBD_WRAPLOW(d1, bd), bd); - - ip++; - dest++; - } -} - -void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, - int dest_stride, int bd) { - int i; - tran_high_t a1, e1; - tran_low_t tmp[4]; - const tran_low_t *ip = in; - tran_low_t *op = tmp; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - (void) bd; - - a1 = ip[0] >> UNIT_QUANT_SHIFT; - e1 = a1 >> 1; - a1 -= e1; - op[0] = HIGHBD_WRAPLOW(a1, bd); - op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd); - - ip = tmp; - for (i = 0; i < 4; i++) { - e1 = ip[0] >> 1; - a1 = ip[0] - e1; - dest[dest_stride * 0] = highbd_clip_pixel_add( - dest[dest_stride * 0], a1, bd); - dest[dest_stride * 1] = highbd_clip_pixel_add( - dest[dest_stride * 1], e1, bd); - dest[dest_stride * 2] = highbd_clip_pixel_add( - dest[dest_stride * 2], e1, bd); - dest[dest_stride * 3] = highbd_clip_pixel_add( - dest[dest_stride * 3], e1, bd); - ip++; - dest++; - } -} - -void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step[4]; - tran_high_t temp1, temp2; - (void) bd; - // stage 1 - temp1 = (input[0] + input[2]) * cospi_16_64; - temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; - temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 2 - output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); - output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd); - output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); - output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd); -} - -void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[4], temp_out[4]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - for (i = 0; i < 4; ++i) { - vpx_highbd_idct4_c(input, outptr, bd); - input += 4; - outptr += 4; - } - - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - vpx_highbd_idct4_c(temp_in, temp_out, bd); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); - } - } -} - -void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, - int dest_stride, int bd) { - int i; - tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 4); - - for (i = 0; i < 4; i++) { - dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); - dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); - dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); - dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); - dest += dest_stride; - } -} - -void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step1[8], step2[8]; - tran_high_t temp1, temp2; - // stage 1 - step1[0] = input[0]; - step1[2] = input[4]; - step1[1] = input[2]; - step1[3] = input[6]; - temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; - temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; - temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 2 & stage 3 - even half - vpx_highbd_idct4_c(step1, step1, bd); - - // stage 2 - odd half - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - // stage 3 - odd half - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - // stage 4 - output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); - output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); - output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); - output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); -} - -void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. - for (i = 0; i < 8; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - - // Then transform columns. - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } -} - -void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - int i, j; - tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 5); - for (j = 0; j < 8; ++j) { - for (i = 0; i < 8; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); - dest += stride; - } -} - -void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - - tran_low_t x0 = input[0]; - tran_low_t x1 = input[1]; - tran_low_t x2 = input[2]; - tran_low_t x3 = input[3]; - (void) bd; - - if (!(x0 | x1 | x2 | x3)) { - memset(output, 0, 4 * sizeof(*output)); - return; - } - - s0 = sinpi_1_9 * x0; - s1 = sinpi_2_9 * x0; - s2 = sinpi_3_9 * x1; - s3 = sinpi_4_9 * x2; - s4 = sinpi_1_9 * x2; - s5 = sinpi_2_9 * x3; - s6 = sinpi_4_9 * x3; - s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd); - - s0 = s0 + s3 + s5; - s1 = s1 - s4 - s6; - s3 = s2; - s2 = sinpi_3_9 * s7; - - // 1-D transform scaling factor is sqrt(2). - // The overall dynamic range is 14b (input) + 14b (multiplication scaling) - // + 1b (addition) = 29b. - // Hence the output bit depth is 15b. - output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd); - output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd); - output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); - output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd); -} - -void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - - tran_low_t x0 = input[7]; - tran_low_t x1 = input[0]; - tran_low_t x2 = input[5]; - tran_low_t x3 = input[2]; - tran_low_t x4 = input[3]; - tran_low_t x5 = input[4]; - tran_low_t x6 = input[1]; - tran_low_t x7 = input[6]; - (void) bd; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { - memset(output, 0, 8 * sizeof(*output)); - return; - } - - // stage 1 - s0 = cospi_2_64 * x0 + cospi_30_64 * x1; - s1 = cospi_30_64 * x0 - cospi_2_64 * x1; - s2 = cospi_10_64 * x2 + cospi_22_64 * x3; - s3 = cospi_22_64 * x2 - cospi_10_64 * x3; - s4 = cospi_18_64 * x4 + cospi_14_64 * x5; - s5 = cospi_14_64 * x4 - cospi_18_64 * x5; - s6 = cospi_26_64 * x6 + cospi_6_64 * x7; - s7 = cospi_6_64 * x6 - cospi_26_64 * x7; - - x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd); - x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd); - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd); - - // stage 2 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = cospi_8_64 * x4 + cospi_24_64 * x5; - s5 = cospi_24_64 * x4 - cospi_8_64 * x5; - s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; - s7 = cospi_8_64 * x6 + cospi_24_64 * x7; - - x0 = HIGHBD_WRAPLOW(s0 + s2, bd); - x1 = HIGHBD_WRAPLOW(s1 + s3, bd); - x2 = HIGHBD_WRAPLOW(s0 - s2, bd); - x3 = HIGHBD_WRAPLOW(s1 - s3, bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); - - // stage 3 - s2 = cospi_16_64 * (x2 + x3); - s3 = cospi_16_64 * (x2 - x3); - s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (x6 - x7); - - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); - - output[0] = HIGHBD_WRAPLOW(x0, bd); - output[1] = HIGHBD_WRAPLOW(-x4, bd); - output[2] = HIGHBD_WRAPLOW(x6, bd); - output[3] = HIGHBD_WRAPLOW(-x2, bd); - output[4] = HIGHBD_WRAPLOW(x3, bd); - output[5] = HIGHBD_WRAPLOW(-x7, bd); - output[6] = HIGHBD_WRAPLOW(x5, bd); - output[7] = HIGHBD_WRAPLOW(-x1, bd); -} - -void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. - // Only first 4 row has non-zero coefs. - for (i = 0; i < 4; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - // Then transform columns. - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } -} - -void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step1[16], step2[16]; - tran_high_t temp1, temp2; - (void) bd; - - // stage 1 - step1[0] = input[0/2]; - step1[1] = input[16/2]; - step1[2] = input[8/2]; - step1[3] = input[24/2]; - step1[4] = input[4/2]; - step1[5] = input[20/2]; - step1[6] = input[12/2]; - step1[7] = input[28/2]; - step1[8] = input[2/2]; - step1[9] = input[18/2]; - step1[10] = input[10/2]; - step1[11] = input[26/2]; - step1[12] = input[6/2]; - step1[13] = input[22/2]; - step1[14] = input[14/2]; - step1[15] = input[30/2]; - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); - step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); - step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); - step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); - step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[11] = step1[11]; - step2[12] = step1[12]; - - // stage 5 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); - step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); - step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); - step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); - step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); - step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); - step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); - - // stage 6 - step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); - step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); - step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); - output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); - output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); - output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); - output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); - output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); - output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); - output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); - output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); - output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); - output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); - output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); - output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); - output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); - output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); - output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); -} - -void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. - for (i = 0; i < 16; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - - // Then transform columns. - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; - tran_high_t s9, s10, s11, s12, s13, s14, s15; - - tran_low_t x0 = input[15]; - tran_low_t x1 = input[0]; - tran_low_t x2 = input[13]; - tran_low_t x3 = input[2]; - tran_low_t x4 = input[11]; - tran_low_t x5 = input[4]; - tran_low_t x6 = input[9]; - tran_low_t x7 = input[6]; - tran_low_t x8 = input[7]; - tran_low_t x9 = input[8]; - tran_low_t x10 = input[5]; - tran_low_t x11 = input[10]; - tran_low_t x12 = input[3]; - tran_low_t x13 = input[12]; - tran_low_t x14 = input[1]; - tran_low_t x15 = input[14]; - (void) bd; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 - | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { - memset(output, 0, 16 * sizeof(*output)); - return; - } - - // stage 1 - s0 = x0 * cospi_1_64 + x1 * cospi_31_64; - s1 = x0 * cospi_31_64 - x1 * cospi_1_64; - s2 = x2 * cospi_5_64 + x3 * cospi_27_64; - s3 = x2 * cospi_27_64 - x3 * cospi_5_64; - s4 = x4 * cospi_9_64 + x5 * cospi_23_64; - s5 = x4 * cospi_23_64 - x5 * cospi_9_64; - s6 = x6 * cospi_13_64 + x7 * cospi_19_64; - s7 = x6 * cospi_19_64 - x7 * cospi_13_64; - s8 = x8 * cospi_17_64 + x9 * cospi_15_64; - s9 = x8 * cospi_15_64 - x9 * cospi_17_64; - s10 = x10 * cospi_21_64 + x11 * cospi_11_64; - s11 = x10 * cospi_11_64 - x11 * cospi_21_64; - s12 = x12 * cospi_25_64 + x13 * cospi_7_64; - s13 = x12 * cospi_7_64 - x13 * cospi_25_64; - s14 = x14 * cospi_29_64 + x15 * cospi_3_64; - s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - - x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd); - x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd); - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd); - x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd); - x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd); - x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd); - x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd); - x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd); - x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd); - - // stage 2 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4; - s5 = x5; - s6 = x6; - s7 = x7; - s8 = x8 * cospi_4_64 + x9 * cospi_28_64; - s9 = x8 * cospi_28_64 - x9 * cospi_4_64; - s10 = x10 * cospi_20_64 + x11 * cospi_12_64; - s11 = x10 * cospi_12_64 - x11 * cospi_20_64; - s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; - s13 = x12 * cospi_4_64 + x13 * cospi_28_64; - s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; - s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - - x0 = HIGHBD_WRAPLOW(s0 + s4, bd); - x1 = HIGHBD_WRAPLOW(s1 + s5, bd); - x2 = HIGHBD_WRAPLOW(s2 + s6, bd); - x3 = HIGHBD_WRAPLOW(s3 + s7, bd); - x4 = HIGHBD_WRAPLOW(s0 - s4, bd); - x5 = HIGHBD_WRAPLOW(s1 - s5, bd); - x6 = HIGHBD_WRAPLOW(s2 - s6, bd); - x7 = HIGHBD_WRAPLOW(s3 - s7, bd); - x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd); - x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd); - x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd); - x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd); - x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd); - x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd); - - // stage 3 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4 * cospi_8_64 + x5 * cospi_24_64; - s5 = x4 * cospi_24_64 - x5 * cospi_8_64; - s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; - s7 = x6 * cospi_8_64 + x7 * cospi_24_64; - s8 = x8; - s9 = x9; - s10 = x10; - s11 = x11; - s12 = x12 * cospi_8_64 + x13 * cospi_24_64; - s13 = x12 * cospi_24_64 - x13 * cospi_8_64; - s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; - s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - - x0 = HIGHBD_WRAPLOW(s0 + s2, bd); - x1 = HIGHBD_WRAPLOW(s1 + s3, bd); - x2 = HIGHBD_WRAPLOW(s0 - s2, bd); - x3 = HIGHBD_WRAPLOW(s1 - s3, bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); - x8 = HIGHBD_WRAPLOW(s8 + s10, bd); - x9 = HIGHBD_WRAPLOW(s9 + s11, bd); - x10 = HIGHBD_WRAPLOW(s8 - s10, bd); - x11 = HIGHBD_WRAPLOW(s9 - s11, bd); - x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd); - x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd); - - // stage 4 - s2 = (- cospi_16_64) * (x2 + x3); - s3 = cospi_16_64 * (x2 - x3); - s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (-x6 + x7); - s10 = cospi_16_64 * (x10 + x11); - s11 = cospi_16_64 * (-x10 + x11); - s14 = (- cospi_16_64) * (x14 + x15); - s15 = cospi_16_64 * (x14 - x15); - - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); - x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd); - x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd); - - output[0] = HIGHBD_WRAPLOW(x0, bd); - output[1] = HIGHBD_WRAPLOW(-x8, bd); - output[2] = HIGHBD_WRAPLOW(x12, bd); - output[3] = HIGHBD_WRAPLOW(-x4, bd); - output[4] = HIGHBD_WRAPLOW(x6, bd); - output[5] = HIGHBD_WRAPLOW(x14, bd); - output[6] = HIGHBD_WRAPLOW(x10, bd); - output[7] = HIGHBD_WRAPLOW(x2, bd); - output[8] = HIGHBD_WRAPLOW(x3, bd); - output[9] = HIGHBD_WRAPLOW(x11, bd); - output[10] = HIGHBD_WRAPLOW(x15, bd); - output[11] = HIGHBD_WRAPLOW(x7, bd); - output[12] = HIGHBD_WRAPLOW(x5, bd); - output[13] = HIGHBD_WRAPLOW(-x13, bd); - output[14] = HIGHBD_WRAPLOW(x9, bd); - output[15] = HIGHBD_WRAPLOW(-x1, bd); -} - -void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. Since all non-zero dct coefficients are in - // upper-left 4x4 area, we only need to calculate first 4 rows here. - for (i = 0; i < 4; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - - // Then transform columns. - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - int i, j; - tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 6); - for (j = 0; j < 16; ++j) { - for (i = 0; i < 16; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); - dest += stride; - } -} - -static void highbd_idct32_c(const tran_low_t *input, - tran_low_t *output, int bd) { - tran_low_t step1[32], step2[32]; - tran_high_t temp1, temp2; - (void) bd; - - // stage 1 - step1[0] = input[0]; - step1[1] = input[16]; - step1[2] = input[8]; - step1[3] = input[24]; - step1[4] = input[4]; - step1[5] = input[20]; - step1[6] = input[12]; - step1[7] = input[28]; - step1[8] = input[2]; - step1[9] = input[18]; - step1[10] = input[10]; - step1[11] = input[26]; - step1[12] = input[6]; - step1[13] = input[22]; - step1[14] = input[14]; - step1[15] = input[30]; - - temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; - temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; - temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; - temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; - temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; - temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; - temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; - temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; - temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd); - step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd); - step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd); - step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd); - step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd); - step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd); - step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd); - step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd); - step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd); - step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd); - step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd); - step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd); - step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd); - step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd); - step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd); - step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); - step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); - step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); - step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); - step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); - - step1[16] = step2[16]; - step1[31] = step2[31]; - temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; - temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; - temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[19] = step2[19]; - step1[20] = step2[20]; - temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; - temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; - temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[27] = step2[27]; - step1[28] = step2[28]; - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[11] = step1[11]; - step2[12] = step1[12]; - - step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd); - step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd); - step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd); - step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd); - step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd); - step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd); - step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd); - step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd); - - step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd); - step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd); - step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd); - step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd); - step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd); - step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd); - step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd); - step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd); - - // stage 5 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); - step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); - step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); - step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); - step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); - step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); - step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); - - step1[16] = step2[16]; - step1[17] = step2[17]; - temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; - temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; - temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; - temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; - temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[22] = step2[22]; - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[25] = step2[25]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // stage 6 - step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); - step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); - step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[14] = step1[14]; - step2[15] = step1[15]; - - step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd); - step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd); - step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd); - step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd); - step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd); - step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd); - step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd); - step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd); - - step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd); - step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd); - step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd); - step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd); - step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd); - step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd); - step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd); - step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd); - - // stage 7 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); - step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); - step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); - step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); - step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); - step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); - step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); - step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); - step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); - step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); - step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); - step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); - step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); - step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); - - step1[16] = step2[16]; - step1[17] = step2[17]; - step1[18] = step2[18]; - step1[19] = step2[19]; - temp1 = (-step2[20] + step2[27]) * cospi_16_64; - temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step2[21] + step2[26]) * cospi_16_64; - temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step2[22] + step2[25]) * cospi_16_64; - temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step2[23] + step2[24]) * cospi_16_64; - temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[28] = step2[28]; - step1[29] = step2[29]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // final stage - output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd); - output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd); - output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd); - output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd); - output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd); - output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd); - output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd); - output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd); - output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd); - output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd); - output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd); - output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd); - output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd); - output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd); - output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd); - output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd); - output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd); - output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd); - output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd); - output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd); - output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd); - output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd); - output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd); - output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd); - output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd); - output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd); - output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd); - output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd); - output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd); - output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd); - output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd); - output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd); -} - -void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[32 * 32]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - for (i = 0; i < 32; ++i) { - tran_low_t zero_coeff[16]; - for (j = 0; j < 16; ++j) - zero_coeff[j] = input[2 * j] | input[2 * j + 1]; - for (j = 0; j < 8; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 4; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 2; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - - if (zero_coeff[0] | zero_coeff[1]) - highbd_idct32_c(input, outptr, bd); - else - memset(outptr, 0, sizeof(tran_low_t) * 32); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - highbd_idct32_c(temp_in, temp_out, bd); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[32 * 32] = {0}; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - // Only upper-left 8x8 has non-zero coeff. - for (i = 0; i < 8; ++i) { - highbd_idct32_c(input, outptr, bd); - input += 32; - outptr += 32; - } - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - highbd_idct32_c(temp_in, temp_out, bd); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - int i, j; - int a1; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 6); - - for (j = 0; j < 32; ++j) { - for (i = 0; i < 32; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); - dest += stride; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/inv_txfm.h b/thirdparty/libvpx/vpx_dsp/inv_txfm.h deleted file mode 100644 index 9cfe1be3a7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/inv_txfm.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_INV_TXFM_H_ -#define VPX_DSP_INV_TXFM_H_ - -#include - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE tran_high_t check_range(tran_high_t input) { -#if CONFIG_COEFFICIENT_RANGE_CHECKING - // For valid VP9 input streams, intermediate stage coefficients should always - // stay within the range of a signed 16 bit integer. Coefficients can go out - // of this range for invalid/corrupt VP9 streams. However, strictly checking - // this range for every intermediate coefficient can burdensome for a decoder, - // therefore the following assertion is only enabled when configured with - // --enable-coefficient-range-checking. - assert(INT16_MIN <= input); - assert(input <= INT16_MAX); -#endif // CONFIG_COEFFICIENT_RANGE_CHECKING - return input; -} - -static INLINE tran_high_t dct_const_round_shift(tran_high_t input) { - tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return (tran_high_t)rv; -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE tran_high_t highbd_check_range(tran_high_t input, - int bd) { -#if CONFIG_COEFFICIENT_RANGE_CHECKING - // For valid highbitdepth VP9 streams, intermediate stage coefficients will - // stay within the ranges: - // - 8 bit: signed 16 bit integer - // - 10 bit: signed 18 bit integer - // - 12 bit: signed 20 bit integer - const int32_t int_max = (1 << (7 + bd)) - 1; - const int32_t int_min = -int_max - 1; - assert(int_min <= input); - assert(input <= int_max); - (void) int_min; -#endif // CONFIG_COEFFICIENT_RANGE_CHECKING - (void) bd; - return input; -} - -static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) { - tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return (tran_high_t)rv; -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#if CONFIG_EMULATE_HARDWARE -// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a -// non-normative method to handle overflows. A stream that causes -// overflows in the inverse transform is considered invalid in VP9, -// and a hardware implementer is free to choose any reasonable -// method to handle overflows. However to aid in hardware -// verification they can use a specific implementation of the -// WRAPLOW() macro below that is identical to their intended -// hardware implementation (and also use configure options to trigger -// the C-implementation of the transform). -// -// The particular WRAPLOW implementation below performs strict -// overflow wrapping to match common hardware implementations. -// bd of 8 uses trans_low with 16bits, need to remove 16bits -// bd of 10 uses trans_low with 18bits, need to remove 14bits -// bd of 12 uses trans_low with 20bits, need to remove 12bits -// bd of x uses trans_low with 8+x bits, need to remove 24-x bits - -#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) -#if CONFIG_VP9_HIGHBITDEPTH -#define HIGHBD_WRAPLOW(x, bd) \ - ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) -#endif // CONFIG_VP9_HIGHBITDEPTH - -#else // CONFIG_EMULATE_HARDWARE - -#define WRAPLOW(x) ((int32_t)check_range(x)) -#if CONFIG_VP9_HIGHBITDEPTH -#define HIGHBD_WRAPLOW(x, bd) \ - ((int32_t)highbd_check_range((x), bd)) -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_EMULATE_HARDWARE - -void idct4_c(const tran_low_t *input, tran_low_t *output); -void idct8_c(const tran_low_t *input, tran_low_t *output); -void idct16_c(const tran_low_t *input, tran_low_t *output); -void idct32_c(const tran_low_t *input, tran_low_t *output); -void iadst4_c(const tran_low_t *input, tran_low_t *output); -void iadst8_c(const tran_low_t *input, tran_low_t *output); -void iadst16_c(const tran_low_t *input, tran_low_t *output); - -#if CONFIG_VP9_HIGHBITDEPTH -void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd); - -void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); - -static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, - int bd) { - trans = HIGHBD_WRAPLOW(trans, bd); - return clip_pixel_highbd(dest + (int)trans, bd); -} -#endif - -static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { - trans = WRAPLOW(trans); - return clip_pixel(dest + (int)trans); -} -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_INV_TXFM_H_ diff --git a/thirdparty/libvpx/vpx_dsp/loopfilter.c b/thirdparty/libvpx/vpx_dsp/loopfilter.c deleted file mode 100644 index 645a1ab95e..0000000000 --- a/thirdparty/libvpx/vpx_dsp/loopfilter.c +++ /dev/null @@ -1,767 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" - -static INLINE int8_t signed_char_clamp(int t) { - return (int8_t)clamp(t, -128, 127); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE int16_t signed_char_clamp_high(int t, int bd) { - switch (bd) { - case 10: - return (int16_t)clamp(t, -128*4, 128*4-1); - case 12: - return (int16_t)clamp(t, -128*16, 128*16-1); - case 8: - default: - return (int16_t)clamp(t, -128, 128-1); - } -} -#endif - -// should we apply any filter at all: 11111111 yes, 00000000 no -static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, - uint8_t p3, uint8_t p2, - uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1, - uint8_t q2, uint8_t q3) { - int8_t mask = 0; - mask |= (abs(p3 - p2) > limit) * -1; - mask |= (abs(p2 - p1) > limit) * -1; - mask |= (abs(p1 - p0) > limit) * -1; - mask |= (abs(q1 - q0) > limit) * -1; - mask |= (abs(q2 - q1) > limit) * -1; - mask |= (abs(q3 - q2) > limit) * -1; - mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - return ~mask; -} - -static INLINE int8_t flat_mask4(uint8_t thresh, - uint8_t p3, uint8_t p2, - uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1, - uint8_t q2, uint8_t q3) { - int8_t mask = 0; - mask |= (abs(p1 - p0) > thresh) * -1; - mask |= (abs(q1 - q0) > thresh) * -1; - mask |= (abs(p2 - p0) > thresh) * -1; - mask |= (abs(q2 - q0) > thresh) * -1; - mask |= (abs(p3 - p0) > thresh) * -1; - mask |= (abs(q3 - q0) > thresh) * -1; - return ~mask; -} - -static INLINE int8_t flat_mask5(uint8_t thresh, - uint8_t p4, uint8_t p3, - uint8_t p2, uint8_t p1, - uint8_t p0, uint8_t q0, - uint8_t q1, uint8_t q2, - uint8_t q3, uint8_t q4) { - int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3); - mask |= (abs(p4 - p0) > thresh) * -1; - mask |= (abs(q4 - q0) > thresh) * -1; - return ~mask; -} - -// is there high edge variance internal edge: 11111111 yes, 00000000 no -static INLINE int8_t hev_mask(uint8_t thresh, uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1) { - int8_t hev = 0; - hev |= (abs(p1 - p0) > thresh) * -1; - hev |= (abs(q1 - q0) > thresh) * -1; - return hev; -} - -static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, - uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { - int8_t filter1, filter2; - - const int8_t ps1 = (int8_t) *op1 ^ 0x80; - const int8_t ps0 = (int8_t) *op0 ^ 0x80; - const int8_t qs0 = (int8_t) *oq0 ^ 0x80; - const int8_t qs1 = (int8_t) *oq1 ^ 0x80; - const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); - - // add outer taps if we have high edge variance - int8_t filter = signed_char_clamp(ps1 - qs1) & hev; - - // inner taps - filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; - - // save bottom 3 bits so that we round one side +4 and the other +3 - // if it equals 4 we'll set to adjust by -1 to account for the fact - // we'd round 3 the other way - filter1 = signed_char_clamp(filter + 4) >> 3; - filter2 = signed_char_clamp(filter + 3) >> 3; - - *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; - *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; - - // outer tap adjustments - filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; - - *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; - *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; -} - -void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, - const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p); - ++s; - } -} - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - filter4(mask, *thresh, s - 2, s - 1, s, s + 1); - s += pitch; - } -} - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); - vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); -} - -static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, - uint8_t *op3, uint8_t *op2, - uint8_t *op1, uint8_t *op0, - uint8_t *oq0, uint8_t *oq1, - uint8_t *oq2, uint8_t *oq3) { - if (flat && mask) { - const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; - const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; - - // 7-tap filter [1, 1, 1, 2, 1, 1, 1] - *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); - *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); - *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); - *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); - *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); - *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); - } else { - filter4(mask, thresh, op1, op0, oq0, oq1); - } -} - -void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p); - ++s; - } -} - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - int i; - - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3); - s += pitch; - } -} - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0); - vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); -} - -static INLINE void filter16(int8_t mask, uint8_t thresh, - uint8_t flat, uint8_t flat2, - uint8_t *op7, uint8_t *op6, - uint8_t *op5, uint8_t *op4, - uint8_t *op3, uint8_t *op2, - uint8_t *op1, uint8_t *op0, - uint8_t *oq0, uint8_t *oq1, - uint8_t *oq2, uint8_t *oq3, - uint8_t *oq4, uint8_t *oq5, - uint8_t *oq6, uint8_t *oq7) { - if (flat2 && flat && mask) { - const uint8_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4, - p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; - - const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, - q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; - - // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] - *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + - q0, 4); - *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + - q0 + q1, 4); - *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + - q0 + q1 + q2, 4); - *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + - q0 + q1 + q2 + q3, 4); - *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4, 4); - *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + - q0 + q1 + q2 + q3 + q4 + q5, 4); - *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + - q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); - *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + - q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); - *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + - q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); - *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + - q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); - *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + - q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); - *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); - *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + - q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); - *oq6 = ROUND_POWER_OF_TWO(p0 + - q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); - } else { - filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3); - } -} - -static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, int count) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat2 = flat_mask5(1, - s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, - q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p]); - - filter16(mask, *thresh, flat, flat2, - s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, - s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p, - s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p); - ++s; - } -} - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); -} - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); -} - -static void mb_lpf_vertical_edge_w(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { - int i; - - for (i = 0; i < count; ++i) { - const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, - q0, s[4], s[5], s[6], s[7]); - - filter16(mask, *thresh, flat, flat2, - s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7); - s += p; - } -} - -void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); -} - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); -} - -#if CONFIG_VP9_HIGHBITDEPTH -// Should we apply any filter at all: 11111111 yes, 00000000 no ? -static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit, - uint16_t p3, uint16_t p2, - uint16_t p1, uint16_t p0, - uint16_t q0, uint16_t q1, - uint16_t q2, uint16_t q3, int bd) { - int8_t mask = 0; - int16_t limit16 = (uint16_t)limit << (bd - 8); - int16_t blimit16 = (uint16_t)blimit << (bd - 8); - mask |= (abs(p3 - p2) > limit16) * -1; - mask |= (abs(p2 - p1) > limit16) * -1; - mask |= (abs(p1 - p0) > limit16) * -1; - mask |= (abs(q1 - q0) > limit16) * -1; - mask |= (abs(q2 - q1) > limit16) * -1; - mask |= (abs(q3 - q2) > limit16) * -1; - mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1; - return ~mask; -} - -static INLINE int8_t highbd_flat_mask4(uint8_t thresh, - uint16_t p3, uint16_t p2, - uint16_t p1, uint16_t p0, - uint16_t q0, uint16_t q1, - uint16_t q2, uint16_t q3, int bd) { - int8_t mask = 0; - int16_t thresh16 = (uint16_t)thresh << (bd - 8); - mask |= (abs(p1 - p0) > thresh16) * -1; - mask |= (abs(q1 - q0) > thresh16) * -1; - mask |= (abs(p2 - p0) > thresh16) * -1; - mask |= (abs(q2 - q0) > thresh16) * -1; - mask |= (abs(p3 - p0) > thresh16) * -1; - mask |= (abs(q3 - q0) > thresh16) * -1; - return ~mask; -} - -static INLINE int8_t highbd_flat_mask5(uint8_t thresh, - uint16_t p4, uint16_t p3, - uint16_t p2, uint16_t p1, - uint16_t p0, uint16_t q0, - uint16_t q1, uint16_t q2, - uint16_t q3, uint16_t q4, int bd) { - int8_t mask = ~highbd_flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3, bd); - int16_t thresh16 = (uint16_t)thresh << (bd - 8); - mask |= (abs(p4 - p0) > thresh16) * -1; - mask |= (abs(q4 - q0) > thresh16) * -1; - return ~mask; -} - -// Is there high edge variance internal edge: -// 11111111_11111111 yes, 00000000_00000000 no ? -static INLINE int16_t highbd_hev_mask(uint8_t thresh, uint16_t p1, uint16_t p0, - uint16_t q0, uint16_t q1, int bd) { - int16_t hev = 0; - int16_t thresh16 = (uint16_t)thresh << (bd - 8); - hev |= (abs(p1 - p0) > thresh16) * -1; - hev |= (abs(q1 - q0) > thresh16) * -1; - return hev; -} - -static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, - uint16_t *op0, uint16_t *oq0, uint16_t *oq1, - int bd) { - int16_t filter1, filter2; - // ^0x80 equivalent to subtracting 0x80 from the values to turn them - // into -128 to +127 instead of 0 to 255. - int shift = bd - 8; - const int16_t ps1 = (int16_t)*op1 - (0x80 << shift); - const int16_t ps0 = (int16_t)*op0 - (0x80 << shift); - const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift); - const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift); - const uint16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd); - - // Add outer taps if we have high edge variance. - int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev; - - // Inner taps. - filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask; - - // Save bottom 3 bits so that we round one side +4 and the other +3 - // if it equals 4 we'll set to adjust by -1 to account for the fact - // we'd round 3 the other way. - filter1 = signed_char_clamp_high(filter + 4, bd) >> 3; - filter2 = signed_char_clamp_high(filter + 3, bd) >> 3; - - *oq0 = signed_char_clamp_high(qs0 - filter1, bd) + (0x80 << shift); - *op0 = signed_char_clamp_high(ps0 + filter2, bd) + (0x80 << shift); - - // Outer tap adjustments. - filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; - - *oq1 = signed_char_clamp_high(qs1 - filter, bd) + (0x80 << shift); - *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift); -} - -void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, - const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4 * p]; - const uint16_t p2 = s[-3 * p]; - const uint16_t p1 = s[-2 * p]; - const uint16_t p0 = s[-p]; - const uint16_t q0 = s[0 * p]; - const uint16_t q1 = s[1 * p]; - const uint16_t q2 = s[2 * p]; - const uint16_t q3 = s[3 * p]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd); - ++s; - } -} - -void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd); -} - -void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd); - s += pitch; - } -} - -void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, bd); -} - -static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, - uint16_t *op3, uint16_t *op2, - uint16_t *op1, uint16_t *op0, - uint16_t *oq0, uint16_t *oq1, - uint16_t *oq2, uint16_t *oq3, int bd) { - if (flat && mask) { - const uint16_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; - const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; - - // 7-tap filter [1, 1, 1, 2, 1, 1, 1] - *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); - *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); - *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); - *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); - *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); - *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); - } else { - highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd); - } -} - -void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - highbd_filter8(mask, *thresh, flat, - s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p, bd); - ++s; - } -} - -void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd); -} - -void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - int i; - - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - highbd_filter8(mask, *thresh, flat, - s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3, - bd); - s += pitch; - } -} - -void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, bd); -} - -static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, - uint8_t flat, uint8_t flat2, - uint16_t *op7, uint16_t *op6, - uint16_t *op5, uint16_t *op4, - uint16_t *op3, uint16_t *op2, - uint16_t *op1, uint16_t *op0, - uint16_t *oq0, uint16_t *oq1, - uint16_t *oq2, uint16_t *oq3, - uint16_t *oq4, uint16_t *oq5, - uint16_t *oq6, uint16_t *oq7, int bd) { - if (flat2 && flat && mask) { - const uint16_t p7 = *op7; - const uint16_t p6 = *op6; - const uint16_t p5 = *op5; - const uint16_t p4 = *op4; - const uint16_t p3 = *op3; - const uint16_t p2 = *op2; - const uint16_t p1 = *op1; - const uint16_t p0 = *op0; - const uint16_t q0 = *oq0; - const uint16_t q1 = *oq1; - const uint16_t q2 = *oq2; - const uint16_t q3 = *oq3; - const uint16_t q4 = *oq4; - const uint16_t q5 = *oq5; - const uint16_t q6 = *oq6; - const uint16_t q7 = *oq7; - - // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] - *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + - q0, 4); - *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + - q0 + q1, 4); - *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + - q0 + q1 + q2, 4); - *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + - q0 + q1 + q2 + q3, 4); - *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4, 4); - *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + - q0 + q1 + q2 + q3 + q4 + q5, 4); - *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + - q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); - *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + - q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); - *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + - q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); - *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + - q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); - *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + - q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); - *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); - *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + - q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); - *oq6 = ROUND_POWER_OF_TWO(p0 + - q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); - } else { - highbd_filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3, - bd); - } -} - -static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count, int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { - const uint16_t p3 = s[-4 * p]; - const uint16_t p2 = s[-3 * p]; - const uint16_t p1 = s[-2 * p]; - const uint16_t p0 = s[-p]; - const uint16_t q0 = s[0 * p]; - const uint16_t q1 = s[1 * p]; - const uint16_t q2 = s[2 * p]; - const uint16_t q3 = s[3 * p]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - const int8_t flat2 = highbd_flat_mask5( - 1, s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, - q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p], bd); - - highbd_filter16(mask, *thresh, flat, flat2, - s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, - s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p, - s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p, - bd); - ++s; - } -} - -void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, int bd) { - highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); -} - -void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, int bd) { - highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); -} - -static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count, int bd) { - int i; - - for (i = 0; i < count; ++i) { - const uint16_t p3 = s[-4]; - const uint16_t p2 = s[-3]; - const uint16_t p1 = s[-2]; - const uint16_t p0 = s[-1]; - const uint16_t q0 = s[0]; - const uint16_t q1 = s[1]; - const uint16_t q2 = s[2]; - const uint16_t q3 = s[3]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - const int8_t flat2 = highbd_flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, - q0, s[4], s[5], s[6], s[7], bd); - - highbd_filter16(mask, *thresh, flat, flat2, - s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, - bd); - s += p; - } -} - -void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); -} - -void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int bd) { - highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/prob.c b/thirdparty/libvpx/vpx_dsp/prob.c deleted file mode 100644 index 639d24dd2f..0000000000 --- a/thirdparty/libvpx/vpx_dsp/prob.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./prob.h" - -const uint8_t vpx_norm[256] = { - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static unsigned int tree_merge_probs_impl(unsigned int i, - const vpx_tree_index *tree, - const vpx_prob *pre_probs, - const unsigned int *counts, - vpx_prob *probs) { - const int l = tree[i]; - const unsigned int left_count = (l <= 0) - ? counts[-l] - : tree_merge_probs_impl(l, tree, pre_probs, counts, probs); - const int r = tree[i + 1]; - const unsigned int right_count = (r <= 0) - ? counts[-r] - : tree_merge_probs_impl(r, tree, pre_probs, counts, probs); - const unsigned int ct[2] = { left_count, right_count }; - probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct); - return left_count + right_count; -} - -void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, - const unsigned int *counts, vpx_prob *probs) { - tree_merge_probs_impl(0, tree, pre_probs, counts, probs); -} diff --git a/thirdparty/libvpx/vpx_dsp/prob.h b/thirdparty/libvpx/vpx_dsp/prob.h deleted file mode 100644 index c3cb103ffb..0000000000 --- a/thirdparty/libvpx/vpx_dsp/prob.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_PROB_H_ -#define VPX_DSP_PROB_H_ - -#include "./vpx_config.h" -#include "./vpx_dsp_common.h" - -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef uint8_t vpx_prob; - -#define MAX_PROB 255 - -#define vpx_prob_half ((vpx_prob) 128) - -typedef int8_t vpx_tree_index; - -#define TREE_SIZE(leaf_count) (2 * (leaf_count) - 2) - -#define vpx_complement(x) (255 - x) - -#define MODE_MV_COUNT_SAT 20 - -/* We build coding trees compactly in arrays. - Each node of the tree is a pair of vpx_tree_indices. - Array index often references a corresponding probability table. - Index <= 0 means done encoding/decoding and value = -Index, - Index > 0 means need another bit, specification at index. - Nonnegative indices are always even; processing begins at node 0. */ - -typedef const vpx_tree_index vpx_tree[]; - -static INLINE vpx_prob clip_prob(int p) { - return (p > 255) ? 255 : (p < 1) ? 1 : p; -} - -static INLINE vpx_prob get_prob(int num, int den) { - return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den); -} - -static INLINE vpx_prob get_binary_prob(int n0, int n1) { - return get_prob(n0, n0 + n1); -} - -/* This function assumes prob1 and prob2 are already within [1,255] range. */ -static INLINE vpx_prob weighted_prob(int prob1, int prob2, int factor) { - return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); -} - -static INLINE vpx_prob merge_probs(vpx_prob pre_prob, - const unsigned int ct[2], - unsigned int count_sat, - unsigned int max_update_factor) { - const vpx_prob prob = get_binary_prob(ct[0], ct[1]); - const unsigned int count = VPXMIN(ct[0] + ct[1], count_sat); - const unsigned int factor = max_update_factor * count / count_sat; - return weighted_prob(pre_prob, prob, factor); -} - -// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT; -static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { - 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, - 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 -}; - -static INLINE vpx_prob mode_mv_merge_probs(vpx_prob pre_prob, - const unsigned int ct[2]) { - const unsigned int den = ct[0] + ct[1]; - if (den == 0) { - return pre_prob; - } else { - const unsigned int count = VPXMIN(den, MODE_MV_COUNT_SAT); - const unsigned int factor = count_to_update_factor[count]; - const vpx_prob prob = - clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den); - return weighted_prob(pre_prob, prob, factor); - } -} - -void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, - const unsigned int *counts, vpx_prob *probs); - - -DECLARE_ALIGNED(16, extern const uint8_t, vpx_norm[256]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_PROB_H_ diff --git a/thirdparty/libvpx/vpx_dsp/txfm_common.h b/thirdparty/libvpx/vpx_dsp/txfm_common.h deleted file mode 100644 index 442e6a57b5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/txfm_common.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_TXFM_COMMON_H_ -#define VPX_DSP_TXFM_COMMON_H_ - -#include "vpx_dsp/vpx_dsp_common.h" - -// Constants and Macros used by all idct/dct functions -#define DCT_CONST_BITS 14 -#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) - -#define UNIT_QUANT_SHIFT 2 -#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) - -// Constants: -// for (int i = 1; i< 32; ++i) -// printf("static const int cospi_%d_64 = %.0f;\n", i, -// round(16384 * cos(i*M_PI/64))); -// Note: sin(k*Pi/64) = cos((32-k)*Pi/64) -static const tran_high_t cospi_1_64 = 16364; -static const tran_high_t cospi_2_64 = 16305; -static const tran_high_t cospi_3_64 = 16207; -static const tran_high_t cospi_4_64 = 16069; -static const tran_high_t cospi_5_64 = 15893; -static const tran_high_t cospi_6_64 = 15679; -static const tran_high_t cospi_7_64 = 15426; -static const tran_high_t cospi_8_64 = 15137; -static const tran_high_t cospi_9_64 = 14811; -static const tran_high_t cospi_10_64 = 14449; -static const tran_high_t cospi_11_64 = 14053; -static const tran_high_t cospi_12_64 = 13623; -static const tran_high_t cospi_13_64 = 13160; -static const tran_high_t cospi_14_64 = 12665; -static const tran_high_t cospi_15_64 = 12140; -static const tran_high_t cospi_16_64 = 11585; -static const tran_high_t cospi_17_64 = 11003; -static const tran_high_t cospi_18_64 = 10394; -static const tran_high_t cospi_19_64 = 9760; -static const tran_high_t cospi_20_64 = 9102; -static const tran_high_t cospi_21_64 = 8423; -static const tran_high_t cospi_22_64 = 7723; -static const tran_high_t cospi_23_64 = 7005; -static const tran_high_t cospi_24_64 = 6270; -static const tran_high_t cospi_25_64 = 5520; -static const tran_high_t cospi_26_64 = 4756; -static const tran_high_t cospi_27_64 = 3981; -static const tran_high_t cospi_28_64 = 3196; -static const tran_high_t cospi_29_64 = 2404; -static const tran_high_t cospi_30_64 = 1606; -static const tran_high_t cospi_31_64 = 804; - -// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 -static const tran_high_t sinpi_1_9 = 5283; -static const tran_high_t sinpi_2_9 = 9929; -static const tran_high_t sinpi_3_9 = 13377; -static const tran_high_t sinpi_4_9 = 15212; - -#endif // VPX_DSP_TXFM_COMMON_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_convolve.c b/thirdparty/libvpx/vpx_dsp/vpx_convolve.c deleted file mode 100644 index 2d1c927cbe..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_convolve.c +++ /dev/null @@ -1,612 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_convolve.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_dsp/vpx_filter.h" -#include "vpx_ports/mem.h" - -static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - int x, y; - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - int x, y; - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = ROUND_POWER_OF_TWO(dst[x] + - clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int x, y; - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int x, y; - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + - clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void convolve(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *const x_filters, - int x0_q4, int x_step_q4, - const InterpKernel *const y_filters, - int y0_q4, int y_step_q4, - int w, int h) { - // Note: Fixed size intermediate buffer, temp, places limits on parameters. - // 2d filtering proceeds in 2 steps: - // (1) Interpolate horizontally into an intermediate buffer, temp. - // (2) Interpolate temp vertically to derive the sub-pixel result. - // Deriving the maximum number of rows in the temp buffer (135): - // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). - // --Largest block size is 64x64 pixels. - // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the - // original frame (in 1/16th pixel units). - // --Must round-up because block may be located at sub-pixel position. - // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. - // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - uint8_t temp[135 * 64]; - int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - - assert(w <= 64); - assert(h <= 64); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - - convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, - x_filters, x0_q4, x_step_q4, w, intermediate_height); - convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, - y_filters, y0_q4, y_step_q4, w, h); -} - -static const InterpKernel *get_filter_base(const int16_t *filter) { - // NOTE: This assumes that the filter table is 256-byte aligned. - // TODO(agrange) Modify to make independent of table alignment. - return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); -} - -static int get_filter_offset(const int16_t *f, const InterpKernel *base) { - return (int)((const InterpKernel *)(intptr_t)f - base); -} - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - (void)filter_y; - (void)y_step_q4; - - convolve_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h); -} - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - (void)filter_y; - (void)y_step_q4; - - convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h); -} - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - (void)filter_x; - (void)x_step_q4; - - convolve_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h); -} - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - (void)filter_x; - (void)x_step_q4; - - convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h); -} - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - convolve(src, src_stride, dst, dst_stride, - filters_x, x0_q4, x_step_q4, - filters_y, y0_q4, y_step_q4, w, h); -} - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); - assert(w <= 64); - assert(h <= 64); - - vpx_convolve8_c(src, src_stride, temp, 64, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); -} - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int r; - - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - for (r = h; r > 0; --r) { - memcpy(dst, src, w); - src += src_stride; - dst += dst_stride; - } -} - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) - dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); - - src += src_stride; - dst += dst_stride; - } -} - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, - x_step_q4, filter_y, y_step_q4, w, h); -} - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, - x_step_q4, filter_y, y_step_q4, w, h); -} - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, - int w, int h, int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, - int w, int h, int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = ROUND_POWER_OF_TWO(dst[x] + - clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h, - int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = clip_pixel_highbd( - ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h, - int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + - clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *const x_filters, - int x0_q4, int x_step_q4, - const InterpKernel *const y_filters, - int y0_q4, int y_step_q4, - int w, int h, int bd) { - // Note: Fixed size intermediate buffer, temp, places limits on parameters. - // 2d filtering proceeds in 2 steps: - // (1) Interpolate horizontally into an intermediate buffer, temp. - // (2) Interpolate temp vertically to derive the sub-pixel result. - // Deriving the maximum number of rows in the temp buffer (135): - // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). - // --Largest block size is 64x64 pixels. - // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the - // original frame (in 1/16th pixel units). - // --Must round-up because block may be located at sub-pixel position. - // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. - // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - uint16_t temp[64 * 135]; - int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - - assert(w <= 64); - assert(h <= 64); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - - highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, CONVERT_TO_BYTEPTR(temp), 64, - x_filters, x0_q4, x_step_q4, w, - intermediate_height, bd); - highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), - 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, - w, h, bd); -} - - -void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - (void)filter_y; - (void)y_step_q4; - - highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - (void)filter_y; - (void)y_step_q4; - - highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - (void)filter_x; - (void)x_step_q4; - - highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - (void)filter_x; - (void)x_step_q4; - - highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - highbd_convolve(src, src_stride, dst, dst_stride, - filters_x, x0_q4, x_step_q4, - filters_y, y0_q4, y_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - // Fixed size intermediate buffer places limits on parameters. - DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]); - assert(w <= 64); - assert(h <= 64); - - vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, - filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); - vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, - NULL, 0, NULL, 0, w, h, bd); -} - -void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h, int bd) { - int r; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - (void)filter_x; - (void)filter_y; - (void)filter_x_stride; - (void)filter_y_stride; - (void)bd; - - for (r = h; r > 0; --r) { - memcpy(dst, src, w * sizeof(uint16_t)); - src += src_stride; - dst += dst_stride; - } -} - -void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h, int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - (void)filter_x; - (void)filter_y; - (void)filter_x_stride; - (void)filter_y_stride; - (void)bd; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); - } - src += src_stride; - dst += dst_stride; - } -} -#endif diff --git a/thirdparty/libvpx/vpx_dsp/vpx_convolve.h b/thirdparty/libvpx/vpx_dsp/vpx_convolve.h deleted file mode 100644 index 9ed3f1750f..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_convolve.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_DSP_VPX_CONVOLVE_H_ -#define VPX_DSP_VPX_CONVOLVE_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - -#if CONFIG_VP9_HIGHBITDEPTH -typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd); -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_VPX_CONVOLVE_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h b/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h deleted file mode 100644 index a1d0a51ef5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_VPX_DSP_COMMON_H_ -#define VPX_DSP_VPX_DSP_COMMON_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) -#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) - -#if CONFIG_VP9_HIGHBITDEPTH -// Note: -// tran_low_t is the datatype used for final transform coefficients. -// tran_high_t is the datatype used for intermediate transform stages. -typedef int64_t tran_high_t; -typedef int32_t tran_low_t; -#else -// Note: -// tran_low_t is the datatype used for final transform coefficients. -// tran_high_t is the datatype used for intermediate transform stages. -typedef int32_t tran_high_t; -typedef int16_t tran_low_t; -#endif // CONFIG_VP9_HIGHBITDEPTH - -static INLINE uint8_t clip_pixel(int val) { - return (val > 255) ? 255 : (val < 0) ? 0 : val; -} - -static INLINE int clamp(int value, int low, int high) { - return value < low ? low : (value > high ? high : value); -} - -static INLINE double fclamp(double value, double low, double high) { - return value < low ? low : (value > high ? high : value); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE uint16_t clip_pixel_highbd(int val, int bd) { - switch (bd) { - case 8: - default: - return (uint16_t)clamp(val, 0, 255); - case 10: - return (uint16_t)clamp(val, 0, 1023); - case 12: - return (uint16_t)clamp(val, 0, 4095); - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_VPX_DSP_COMMON_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c b/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c deleted file mode 100644 index 5fe27b614b..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vpx_dsp_rtcd.h" -#include "vpx_ports/vpx_once.h" - -void vpx_dsp_rtcd() { - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vpx_dsp/vpx_filter.h b/thirdparty/libvpx/vpx_dsp/vpx_filter.h deleted file mode 100644 index 2617febf3b..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_filter.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_VPX_FILTER_H_ -#define VPX_DSP_VPX_FILTER_H_ - -#include "vpx/vpx_integer.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -#define FILTER_BITS 7 - -#define SUBPEL_BITS 4 -#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1) -#define SUBPEL_SHIFTS (1 << SUBPEL_BITS) -#define SUBPEL_TAPS 8 - -typedef int16_t InterpKernel[SUBPEL_TAPS]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_VPX_FILTER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/convolve.h b/thirdparty/libvpx/vpx_dsp/x86/convolve.h deleted file mode 100644 index 7e43eb7c72..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/convolve.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_DSP_X86_CONVOLVE_H_ -#define VPX_DSP_X86_CONVOLVE_H_ - -#include - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -typedef void filter8_1dfunction ( - const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter -); - -#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - assert(filter[3] != 128); \ - assert(step_q4 == 16); \ - if (filter[0] | filter[1] | filter[2]) { \ - while (w >= 16) { \ - vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - if (w == 8) { \ - vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } else if (w == 4) { \ - vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } \ - } else { \ - while (w >= 16) { \ - vpx_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - if (w == 8) { \ - vpx_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } else if (w == 4) { \ - vpx_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } \ - } \ -} - -#define FUN_CONV_2D(avg, opt) \ -void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - assert(filter_x[3] != 128); \ - assert(filter_y[3] != 128); \ - assert(w <= 64); \ - assert(h <= 64); \ - assert(x_step_q4 == 16); \ - assert(y_step_q4 == 16); \ - if (filter_x[0] | filter_x[1] | filter_x[2]) { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ - vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 7); \ - vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } else { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ - vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 1); \ - vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } \ -} - -#if CONFIG_VP9_HIGHBITDEPTH - -typedef void highbd_filter8_1dfunction ( - const uint16_t *src_ptr, - const ptrdiff_t src_pitch, - uint16_t *output_ptr, - ptrdiff_t out_pitch, - unsigned int output_height, - const int16_t *filter, - int bd -); - -#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ - ptrdiff_t src_stride, \ - uint8_t *dst8, \ - ptrdiff_t dst_stride, \ - const int16_t *filter_x, \ - int x_step_q4, \ - const int16_t *filter_y, \ - int y_step_q4, \ - int w, int h, int bd) { \ - if (step_q4 == 16 && filter[3] != 128) { \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ - if (filter[0] | filter[1] | filter[2]) { \ - while (w >= 16) { \ - vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } else { \ - while (w >= 16) { \ - vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } \ - } \ - if (w) { \ - vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h, bd); \ - } \ -} - -#define HIGH_FUN_CONV_2D(avg, opt) \ -void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h, int bd) { \ - assert(w <= 64); \ - assert(h <= 64); \ - if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ - vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 7, bd); \ - vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ - 64, dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } else { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ - vpx_highbd_convolve8_horiz_##opt(src, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 1, bd); \ - vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ - dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } \ - } else { \ - vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, w, \ - h, bd); \ - } \ -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#endif // VPX_DSP_X86_CONVOLVE_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm deleted file mode 100644 index cd6a6ae982..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm +++ /dev/null @@ -1,860 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -pb_1: times 16 db 1 -pw_4: times 8 dw 4 -pw_8: times 8 dw 8 -pw_16: times 8 dw 16 -pw_32: times 8 dw 32 -dc_128: times 16 db 128 -pw2_4: times 8 dw 2 -pw2_8: times 8 dw 4 -pw2_16: times 8 dw 8 -pw2_32: times 8 dw 16 - -SECTION .text - -; ------------------------------------------ -; input: x, y, z, result -; -; trick from pascal -; (x+2y+z+2)>>2 can be calculated as: -; result = avg(x,z) -; result -= xor(x,z) & 1 -; result = avg(result,y) -; ------------------------------------------ -%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 - pavgb %4, %1, %3 - pxor %3, %1 - pand %3, [GLOBAL(pb_1)] - psubb %4, %3 - pavgb %4, %2 -%endmacro - -INIT_XMM sse2 -cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset - GET_GOT goffsetq - - movq m0, [aboveq] - DEFINE_ARGS dst, stride, temp - psrldq m1, m0, 1 - psrldq m2, m0, 2 - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 - - ; store 4 lines - movd [dstq ], m3 - psrlq m3, 8 - movd [dstq+strideq ], m3 - lea dstq, [dstq+strideq*2] - psrlq m3, 8 - movd [dstq ], m3 - psrlq m3, 8 - movd [dstq+strideq ], m3 - psrlq m0, 56 - movd tempq, m0 - mov [dstq+strideq+3], tempb - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset - GET_GOT goffsetq - - movu m1, [aboveq] - pslldq m0, m1, 1 - psrldq m2, m1, 1 - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 - punpckhbw m0, m0 ; 7 7 - punpcklwd m0, m0 ; 7 7 7 7 - punpckldq m0, m0 ; 7 7 7 7 7 7 7 7 - punpcklqdq m3, m0 ; -1 0 1 2 3 4 5 6 7 7 7 7 7 7 7 7 - - ; store 4 lines - psrldq m3, 1 - movq [dstq ], m3 - psrldq m3, 1 - movq [dstq+strideq ], m3 - psrldq m3, 1 - movq [dstq+strideq*2], m3 - psrldq m3, 1 - movq [dstq+stride3q ], m3 - lea dstq, [dstq+strideq*4] - - ; store next 4 lines - psrldq m3, 1 - movq [dstq ], m3 - psrldq m3, 1 - movq [dstq+strideq ], m3 - psrldq m3, 1 - movq [dstq+strideq*2], m3 - psrldq m3, 1 - movq [dstq+stride3q ], m3 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal d207_predictor_4x4, 4, 4, 5, dst, stride, unused, left, goffset - GET_GOT goffsetq - - movd m0, [leftq] ; abcd [byte] - punpcklbw m4, m0, m0 ; aabb ccdd - punpcklwd m4, m4 ; aaaa bbbb cccc dddd - psrldq m4, 12 ; dddd - punpckldq m0, m4 ; abcd dddd - psrldq m1, m0, 1 ; bcdd - psrldq m2, m0, 2 ; cddd - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; a2bc b2cd c3d d - pavgb m1, m0 ; ab, bc, cd, d [byte] - - punpcklbw m1, m3 ; ab, a2bc, bc, b2cd, cd, c3d, d, d - movd [dstq ], m1 - psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d - movd [dstq+strideq], m1 - - lea dstq, [dstq+strideq*2] - psrlq m1, 16 ; cd, c3d, d, d - movd [dstq ], m1 - movd [dstq+strideq], m4 ; d, d, d, d - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - movd m2, [leftq] - movd m0, [aboveq] - pxor m1, m1 - punpckldq m0, m2 - psadbw m0, m1 - paddw m0, [GLOBAL(pw_4)] - psraw m0, 3 - pshuflw m0, m0, 0x0 - packuswb m0, m0 - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset - movifnidn leftq, leftmp - GET_GOT goffsetq - - pxor m1, m1 - movd m0, [leftq] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_4)] - psraw m0, 2 - pshuflw m0, m0, 0x0 - packuswb m0, m0 - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - movd m0, [aboveq] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_4)] - psraw m0, 2 - pshuflw m0, m0, 0x0 - packuswb m0, m0 - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - movq m0, [aboveq] - movq m2, [leftq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - paddw m0, [GLOBAL(pw_8)] - psraw m0, 4 - punpcklbw m0, m0 - pshuflw m0, m0, 0x0 - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - movq m0, [aboveq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_8)] - psraw m0, 3 - punpcklbw m0, m0 - pshuflw m0, m0, 0x0 - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset - movifnidn leftq, leftmp - GET_GOT goffsetq - - pxor m1, m1 - movq m0, [leftq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_8)] - psraw m0, 3 - punpcklbw m0, m0 - pshuflw m0, m0, 0x0 - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - movd m0, [GLOBAL(dc_128)] - movd [dstq ], m0 - movd [dstq+strideq ], m0 - movd [dstq+strideq*2], m0 - movd [dstq+stride3q ], m0 - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - movq m0, [GLOBAL(dc_128)] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [leftq] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw_16)] - psraw m0, 5 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - - -INIT_XMM sse2 -cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - psadbw m0, m1 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_16)] - psraw m0, 4 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [leftq] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - psadbw m0, m1 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_16)] - psraw m0, 4 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - mova m0, [GLOBAL(dc_128)] -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - RESTORE_GOT - RET - - -INIT_XMM sse2 -cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [aboveq+16] - mova m3, [leftq] - mova m4, [leftq+16] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - psadbw m0, m1 - psadbw m2, m1 - psadbw m3, m1 - psadbw m4, m1 - paddw m0, m2 - paddw m0, m3 - paddw m0, m4 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw_32)] - psraw m0, 6 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_32)] - psraw m0, 5 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [leftq] - mova m2, [leftq+16] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_32)] - psraw m0, 5 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - mova m0, [GLOBAL(dc_128)] -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above - movd m0, [aboveq] - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - RET - -INIT_XMM sse2 -cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above - movq m0, [aboveq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - RET - -INIT_XMM sse2 -cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, nlines4 - lea stride3q, [strideq*3] - mov nlines4d, 4 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec nlines4d - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above - mova m0, [aboveq] - mova m1, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, nlines4 - lea stride3q, [strideq*3] - mov nlines4d, 8 -.loop: - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m1 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m1 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m1 - lea dstq, [dstq+strideq*4] - dec nlines4d - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left - movifnidn leftq, leftmp - movd m0, [leftq] - punpcklbw m0, m0 - punpcklbw m0, m0 - pshufd m1, m0, 0x1 - movd [dstq ], m0 - movd [dstq+strideq], m1 - pshufd m2, m0, 0x2 - lea dstq, [dstq+strideq*2] - pshufd m3, m0, 0x3 - movd [dstq ], m2 - movd [dstq+strideq], m3 - RET - -INIT_XMM sse2 -cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left - movifnidn leftq, leftmp - mov lineq, -2 - DEFINE_ARGS dst, stride, line, left, stride3 - lea stride3q, [strideq*3] - movq m0, [leftq ] - punpcklbw m0, m0 ; l1 l1 l2 l2 ... l8 l8 -.loop: - pshuflw m1, m0, 0x0 ; l1 l1 l1 l1 l1 l1 l1 l1 - pshuflw m2, m0, 0x55 ; l2 l2 l2 l2 l2 l2 l2 l2 - movq [dstq ], m1 - movq [dstq+strideq], m2 - pshuflw m1, m0, 0xaa - pshuflw m2, m0, 0xff - movq [dstq+strideq*2], m1 - movq [dstq+stride3q ], m2 - pshufd m0, m0, 0xe ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8 - inc lineq - lea dstq, [dstq+strideq*4] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left - movifnidn leftq, leftmp - mov lineq, -4 - DEFINE_ARGS dst, stride, line, left, stride3 - lea stride3q, [strideq*3] -.loop: - movd m0, [leftq] - punpcklbw m0, m0 - punpcklbw m0, m0 ; l1 to l4 each repeated 4 times - pshufd m1, m0, 0x0 ; l1 repeated 16 times - pshufd m2, m0, 0x55 ; l2 repeated 16 times - mova [dstq ], m1 - mova [dstq+strideq ], m2 - pshufd m1, m0, 0xaa - pshufd m2, m0, 0xff - mova [dstq+strideq*2], m1 - mova [dstq+stride3q ], m2 - inc lineq - lea leftq, [leftq+4 ] - lea dstq, [dstq+strideq*4] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left - movifnidn leftq, leftmp - mov lineq, -8 - DEFINE_ARGS dst, stride, line, left, stride3 - lea stride3q, [strideq*3] -.loop: - movd m0, [leftq] - punpcklbw m0, m0 - punpcklbw m0, m0 ; l1 to l4 each repeated 4 times - pshufd m1, m0, 0x0 ; l1 repeated 16 times - pshufd m2, m0, 0x55 ; l2 repeated 16 times - mova [dstq ], m1 - mova [dstq+16 ], m1 - mova [dstq+strideq ], m2 - mova [dstq+strideq+16 ], m2 - pshufd m1, m0, 0xaa - pshufd m2, m0, 0xff - mova [dstq+strideq*2 ], m1 - mova [dstq+strideq*2+16], m1 - mova [dstq+stride3q ], m2 - mova [dstq+stride3q+16 ], m2 - inc lineq - lea leftq, [leftq+4 ] - lea dstq, [dstq+strideq*4] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left - pxor m1, m1 - movq m0, [aboveq-1]; [63:0] tl t1 t2 t3 t4 x x x - punpcklbw m0, m1 - pshuflw m2, m0, 0x0 ; [63:0] tl tl tl tl [word] - psrldq m0, 2 - psubw m0, m2 ; [63:0] t1-tl t2-tl t3-tl t4-tl [word] - movd m2, [leftq] - punpcklbw m2, m1 - pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] - pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] - paddw m4, m0 - paddw m3, m0 - packuswb m4, m4 - packuswb m3, m3 - movd [dstq ], m4 - movd [dstq+strideq], m3 - lea dstq, [dstq+strideq*2] - pshuflw m4, m2, 0xaa - pshuflw m3, m2, 0xff - paddw m4, m0 - paddw m3, m0 - packuswb m4, m4 - packuswb m3, m3 - movd [dstq ], m4 - movd [dstq+strideq], m3 - RET - -INIT_XMM sse2 -cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left - pxor m1, m1 - movd m2, [aboveq-1] - movq m0, [aboveq] - punpcklbw m2, m1 - punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word] - pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word] - DEFINE_ARGS dst, stride, line, left - mov lineq, -4 - punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word] - psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word] - movq m2, [leftq] - punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word] -.loop - pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] - pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] - punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word] - punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word] - paddw m4, m0 - paddw m3, m0 - packuswb m4, m3 - movq [dstq ], m4 - movhps [dstq+strideq], m4 - lea dstq, [dstq+strideq*2] - psrldq m2, 4 - inc lineq - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left - pxor m1, m1 - mova m2, [aboveq-16]; - mova m0, [aboveq] ; t1 t2 ... t16 [byte] - punpckhbw m2, m1 ; [127:112] tl [word] - punpckhbw m4, m0, m1 - punpcklbw m0, m1 ; m0:m4 t1 t2 ... t16 [word] - DEFINE_ARGS dst, stride, line, left, stride8 - mov lineq, -8 - pshufhw m2, m2, 0xff - mova m3, [leftq] ; l1 l2 ... l16 [byte] - punpckhqdq m2, m2 ; tl repeated 8 times [word] - psubw m0, m2 - psubw m4, m2 ; m0:m4 t1-tl t2-tl ... t16-tl [word] - punpckhbw m5, m3, m1 - punpcklbw m3, m1 ; m3:m5 l1 l2 ... l16 [word] - lea stride8q, [strideq*8] -.loop: - pshuflw m6, m3, 0x0 - pshuflw m7, m5, 0x0 - punpcklqdq m6, m6 ; l1 repeated 8 times [word] - punpcklqdq m7, m7 ; l8 repeated 8 times [word] - paddw m1, m6, m0 - paddw m6, m4 ; m1:m6 ti-tl+l1 [i=1,15] [word] - psrldq m5, 2 - packuswb m1, m6 - mova [dstq ], m1 - paddw m1, m7, m0 - paddw m7, m4 ; m1:m7 ti-tl+l8 [i=1,15] [word] - psrldq m3, 2 - packuswb m1, m7 - mova [dstq+stride8q], m1 - inc lineq - lea dstq, [dstq+strideq] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left - pxor m1, m1 - movd m2, [aboveq-1] - mova m0, [aboveq] - mova m4, [aboveq+16] - punpcklbw m2, m1 - punpckhbw m3, m0, m1 - punpckhbw m5, m4, m1 - punpcklbw m0, m1 - punpcklbw m4, m1 - pshuflw m2, m2, 0x0 - DEFINE_ARGS dst, stride, line, left - mov lineq, -16 - punpcklqdq m2, m2 - add leftq, 32 - psubw m0, m2 - psubw m3, m2 - psubw m4, m2 - psubw m5, m2 -.loop: - movd m2, [leftq+lineq*2] - pxor m1, m1 - punpcklbw m2, m1 - pshuflw m7, m2, 0x55 - pshuflw m2, m2, 0x0 - punpcklqdq m2, m2 - punpcklqdq m7, m7 - paddw m6, m2, m3 - paddw m1, m2, m0 - packuswb m1, m6 - mova [dstq ], m1 - paddw m6, m2, m5 - paddw m1, m2, m4 - packuswb m1, m6 - mova [dstq+16 ], m1 - paddw m6, m7, m3 - paddw m1, m7, m0 - packuswb m1, m6 - mova [dstq+strideq ], m1 - paddw m6, m7, m5 - paddw m1, m7, m4 - packuswb m1, m6 - mova [dstq+strideq+16], m1 - lea dstq, [dstq+strideq*2] - inc lineq - jnz .loop - REP_RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm deleted file mode 100644 index 5e0139fa8d..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm +++ /dev/null @@ -1,871 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA - -pb_1: times 16 db 1 -sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 -sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -sh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -sh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15 -sh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15 -sh_b32104567: db 3, 2, 1, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b8091a2b345: db 8, 0, 9, 1, 10, 2, 11, 3, 4, 5, 0, 0, 0, 0, 0, 0 -sh_b76543210: db 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 -sh_bfedcba9876543210: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - -SECTION .text - -INIT_XMM ssse3 -cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, dst8, line - lea stride3q, [strideq*3] - lea dst8q, [dstq+strideq*8] - mova m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] - pavgb m3, m2, m0 - pxor m2, m0 - pshufb m0, m1 - pand m2, [GLOBAL(pb_1)] - psubb m3, m2 - pavgb m0, m3 - - ; first 4 lines and first half of 3rd 4 lines - mov lined, 2 -.loop: - mova [dstq ], m0 - movhps [dst8q ], m0 - pshufb m0, m1 - mova [dstq +strideq ], m0 - movhps [dst8q+strideq ], m0 - pshufb m0, m1 - mova [dstq +strideq*2 ], m0 - movhps [dst8q+strideq*2 ], m0 - pshufb m0, m1 - mova [dstq +stride3q ], m0 - movhps [dst8q+stride3q ], m0 - pshufb m0, m1 - lea dstq, [dstq +strideq*4] - lea dst8q, [dst8q+strideq*4] - dec lined - jnz .loop - - ; bottom-right 8x8 block - movhps [dstq +8], m0 - movhps [dstq+strideq +8], m0 - movhps [dstq+strideq*2+8], m0 - movhps [dstq+stride3q +8], m0 - lea dstq, [dstq+strideq*4] - movhps [dstq +8], m0 - movhps [dstq+strideq +8], m0 - movhps [dstq+strideq*2+8], m0 - movhps [dstq+stride3q +8], m0 - - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - mova m4, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, dst16, line - lea stride3q, [strideq*3] - lea dst16q, [dstq +strideq*8] - lea dst16q, [dst16q+strideq*8] - mova m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)] - pavgb m3, m2, m4 - pxor m2, m4 - palignr m5, m4, m0, 1 - palignr m6, m4, m0, 2 - pshufb m4, m1 - pand m2, [GLOBAL(pb_1)] - psubb m3, m2 - pavgb m4, m3 - pavgb m3, m0, m6 - pxor m0, m6 - pand m0, [GLOBAL(pb_1)] - psubb m3, m0 - pavgb m5, m3 - - ; write 4x4 lines (and the first half of the second 4x4 lines) - mov lined, 4 -.loop: - mova [dstq ], m5 - mova [dstq +16], m4 - mova [dst16q ], m4 - palignr m3, m4, m5, 1 - pshufb m4, m1 - mova [dstq +strideq ], m3 - mova [dstq +strideq +16], m4 - mova [dst16q+strideq ], m4 - palignr m5, m4, m3, 1 - pshufb m4, m1 - mova [dstq +strideq*2 ], m5 - mova [dstq +strideq*2+16], m4 - mova [dst16q+strideq*2 ], m4 - palignr m3, m4, m5, 1 - pshufb m4, m1 - mova [dstq +stride3q ], m3 - mova [dstq +stride3q +16], m4 - mova [dst16q+stride3q ], m4 - palignr m5, m4, m3, 1 - pshufb m4, m1 - lea dstq, [dstq +strideq*4] - lea dst16q, [dst16q+strideq*4] - dec lined - jnz .loop - - ; write second half of second 4x4 lines - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - lea dstq, [dstq +strideq*4] - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - lea dstq, [dstq +strideq*4] - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - lea dstq, [dstq +strideq*4] - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - - RESTORE_GOT - RET - -; ------------------------------------------ -; input: x, y, z, result -; -; trick from pascal -; (x+2y+z+2)>>2 can be calculated as: -; result = avg(x,z) -; result -= xor(x,z) & 1 -; result = avg(result,y) -; ------------------------------------------ -%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 - pavgb %4, %1, %3 - pxor %3, %1 - pand %3, [GLOBAL(pb_1)] - psubb %4, %3 - pavgb %4, %2 -%endmacro - -INIT_XMM ssse3 -cglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset - GET_GOT goffsetq - - movq m3, [aboveq] - pshufb m1, m3, [GLOBAL(sh_b23456777)] - pshufb m2, m3, [GLOBAL(sh_b12345677)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4 - pavgb m3, m2 - - ; store 4 lines - movd [dstq ], m3 - movd [dstq+strideq], m4 - lea dstq, [dstq+strideq*2] - psrldq m3, 1 - psrldq m4, 1 - movd [dstq ], m3 - movd [dstq+strideq], m4 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset - GET_GOT goffsetq - - movq m3, [aboveq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] - pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] - pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] - pshufb m3, [GLOBAL(sh_b0123456777777777)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4 - pavgb m3, m2 - - ; store 4 lines - movq [dstq ], m3 - movq [dstq+strideq], m4 - psrldq m3, 1 - psrldq m4, 1 - movq [dstq+strideq*2], m3 - movq [dstq+stride3q ], m4 - lea dstq, [dstq+strideq*4] - psrldq m3, 1 - psrldq m4, 1 - - ; store 4 lines - movq [dstq ], m3 - movq [dstq+strideq], m4 - psrldq m3, 1 - psrldq m4, 1 - movq [dstq+strideq*2], m3 - movq [dstq+stride3q ], m4 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, line - lea stride3q, [strideq*3] - mova m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] - pshufb m3, m0, m1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4 - pavgb m0, m3 - - mov lined, 4 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m4 - pshufb m0, m1 - pshufb m4, m1 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m4 - pshufb m0, m1 - pshufb m4, m1 - lea dstq, [dstq+strideq*4] - dec lined - jnz .loop - RESTORE_GOT - REP_RET - -INIT_XMM ssse3 -cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - mova m7, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, line - mova m1, [GLOBAL(sh_b123456789abcdeff)] - lea stride3q, [strideq*3] - pshufb m2, m7, [GLOBAL(sh_b23456789abcdefff)] - pshufb m3, m7, m1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4 - palignr m6, m7, m0, 1 - palignr m5, m7, m0, 2 - pavgb m7, m3 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2 - pavgb m0, m6 - - mov lined, 8 -.loop: - mova [dstq ], m0 - mova [dstq +16], m7 - mova [dstq+strideq ], m2 - mova [dstq+strideq +16], m4 - palignr m3, m7, m0, 1 - palignr m5, m4, m2, 1 - pshufb m7, m1 - pshufb m4, m1 - - mova [dstq+strideq*2 ], m3 - mova [dstq+strideq*2+16], m7 - mova [dstq+stride3q ], m5 - mova [dstq+stride3q +16], m4 - palignr m0, m7, m3, 1 - palignr m2, m4, m5, 1 - pshufb m7, m1 - pshufb m4, m1 - lea dstq, [dstq+strideq*4] - dec lined - jnz .loop - RESTORE_GOT - REP_RET - -INIT_XMM ssse3 -cglobal d153_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset - GET_GOT goffsetq - movd m0, [leftq] ; l1, l2, l3, l4 - movd m1, [aboveq-1] ; tl, t1, t2, t3 - punpckldq m0, m1 ; l1, l2, l3, l4, tl, t1, t2, t3 - pshufb m0, [GLOBAL(sh_b32104567)]; l4, l3, l2, l1, tl, t1, t2, t3 - psrldq m1, m0, 1 ; l3, l2, l1, tl, t1, t2, t3 - psrldq m2, m0, 2 ; l2, l1, tl, t1, t2, t3 - ; comments below are for a predictor like this - ; A1 B1 C1 D1 - ; A2 B2 A1 B1 - ; A3 B3 A2 B2 - ; A4 B4 A3 B3 - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; 3-tap avg B4 B3 B2 B1 C1 D1 - pavgb m1, m0 ; 2-tap avg A4 A3 A2 A1 - - punpcklqdq m3, m1 ; B4 B3 B2 B1 C1 D1 x x A4 A3 A2 A1 .. - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - pshufb m3, [GLOBAL(sh_b8091a2b345)] ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 .. - movd [dstq+stride3q ], m3 - psrldq m3, 2 ; A3 B3 A2 B2 A1 B1 C1 D1 .. - movd [dstq+strideq*2], m3 - psrldq m3, 2 ; A2 B2 A1 B1 C1 D1 .. - movd [dstq+strideq ], m3 - psrldq m3, 2 ; A1 B1 C1 D1 .. - movd [dstq ], m3 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d153_predictor_8x8, 4, 5, 8, dst, stride, above, left, goffset - GET_GOT goffsetq - movq m0, [leftq] ; [0- 7] l1-8 [byte] - movhps m0, [aboveq-1] ; [8-15] tl, t1-7 [byte] - pshufb m1, m0, [GLOBAL(sh_b76543210)] ; l8-1 [word] - pshufb m2, m0, [GLOBAL(sh_b65432108)] ; l7-1,tl [word] - pshufb m3, m0, [GLOBAL(sh_b54321089)] ; l6-1,tl,t1 [word] - pshufb m0, [GLOBAL(sh_b89abcdef)] ; tl,t1-7 [word] - psrldq m4, m0, 1 ; t1-7 [word] - psrldq m5, m0, 2 ; t2-7 [word] - ; comments below are for a predictor like this - ; A1 B1 C1 D1 E1 F1 G1 H1 - ; A2 B2 A1 B1 C1 D1 E1 F1 - ; A3 B3 A2 B2 A1 B1 C1 D1 - ; A4 B4 A3 B3 A2 B2 A1 B1 - ; A5 B5 A4 B4 A3 B3 A2 B2 - ; A6 B6 A5 B5 A4 B4 A3 B3 - ; A7 B7 A6 B6 A5 B5 A4 B4 - ; A8 B8 A7 B7 A6 B6 A5 B5 - pavgb m6, m1, m2 ; 2-tap avg A8-A1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m4, m5, m7 ; 3-tap avg C-H1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m2, m3, m0 ; 3-tap avg B8-1 - - punpcklbw m6, m0 ; A-B8, A-B7 ... A-B2, A-B1 - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - - movhps [dstq+stride3q], m6 ; A-B4, A-B3, A-B2, A-B1 - palignr m0, m7, m6, 10 ; A-B3, A-B2, A-B1, C-H1 - movq [dstq+strideq*2], m0 - psrldq m0, 2 ; A-B2, A-B1, C-H1 - movq [dstq+strideq ], m0 - psrldq m0, 2 ; A-H1 - movq [dstq ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq+stride3q ], m6 ; A-B8, A-B7, A-B6, A-B5 - psrldq m6, 2 ; A-B7, A-B6, A-B5, A-B4 - movq [dstq+strideq*2], m6 - psrldq m6, 2 ; A-B6, A-B5, A-B4, A-B3 - movq [dstq+strideq ], m6 - psrldq m6, 2 ; A-B5, A-B4, A-B3, A-B2 - movq [dstq ], m6 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d153_predictor_16x16, 4, 5, 8, dst, stride, above, left, goffset - GET_GOT goffsetq - mova m0, [leftq] - movu m7, [aboveq-1] - ; comments below are for a predictor like this - ; A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 O1 P1 - ; A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 - ; A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 - ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 - ; A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 - ; A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 - ; A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 - ; A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 - ; A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 - ; Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 - ; Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 - ; Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 - ; Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 - ; Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 - ; Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 - ; Ag Bg Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 - pshufb m6, m7, [GLOBAL(sh_bfedcba9876543210)] - palignr m5, m0, m6, 15 - palignr m3, m0, m6, 14 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg - pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] - pavgb m5, m0 ; A1 - Ag - - punpcklbw m0, m4, m5 ; A-B8 ... A-B1 - punpckhbw m4, m5 ; A-B9 ... A-Bg - - pshufb m3, m7, [GLOBAL(sh_b123456789abcdeff)] - pshufb m5, m7, [GLOBAL(sh_b23456789abcdefff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg C1-P1 - - pshufb m6, m0, [GLOBAL(sh_bfedcba9876543210)] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - palignr m2, m1, m6, 14 - mova [dstq ], m2 - palignr m2, m1, m6, 12 - mova [dstq+strideq ], m2 - palignr m2, m1, m6, 10 - mova [dstq+strideq*2], m2 - palignr m2, m1, m6, 8 - mova [dstq+stride3q ], m2 - lea dstq, [dstq+strideq*4] - palignr m2, m1, m6, 6 - mova [dstq ], m2 - palignr m2, m1, m6, 4 - mova [dstq+strideq ], m2 - palignr m2, m1, m6, 2 - mova [dstq+strideq*2], m2 - pshufb m4, [GLOBAL(sh_bfedcba9876543210)] - mova [dstq+stride3q ], m6 - lea dstq, [dstq+strideq*4] - - palignr m2, m6, m4, 14 - mova [dstq ], m2 - palignr m2, m6, m4, 12 - mova [dstq+strideq ], m2 - palignr m2, m6, m4, 10 - mova [dstq+strideq*2], m2 - palignr m2, m6, m4, 8 - mova [dstq+stride3q ], m2 - lea dstq, [dstq+strideq*4] - palignr m2, m6, m4, 6 - mova [dstq ], m2 - palignr m2, m6, m4, 4 - mova [dstq+strideq ], m2 - palignr m2, m6, m4, 2 - mova [dstq+strideq*2], m2 - mova [dstq+stride3q ], m4 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d153_predictor_32x32, 4, 5, 8, dst, stride, above, left, goffset - GET_GOT goffsetq - mova m0, [leftq] - movu m7, [aboveq-1] - movu m1, [aboveq+15] - - pshufb m4, m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m6, m1, [GLOBAL(sh_b23456789abcdefff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m4, m6, m2 ; 3-tap avg above [high] - - palignr m3, m1, m7, 1 - palignr m5, m1, m7, 2 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg above [low] - - pshufb m7, [GLOBAL(sh_bfedcba9876543210)] - palignr m5, m0, m7, 15 - palignr m3, m0, m7, 14 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg - pavgb m5, m0 ; A1 - Ag - punpcklbw m6, m4, m5 ; A-B8 ... A-B1 - punpckhbw m4, m5 ; A-B9 ... A-Bg - pshufb m6, [GLOBAL(sh_bfedcba9876543210)] - pshufb m4, [GLOBAL(sh_bfedcba9876543210)] - - DEFINE_ARGS dst, stride, stride3, left, line - lea stride3q, [strideq*3] - - palignr m5, m2, m1, 14 - palignr m7, m1, m6, 14 - mova [dstq ], m7 - mova [dstq+16 ], m5 - palignr m5, m2, m1, 12 - palignr m7, m1, m6, 12 - mova [dstq+strideq ], m7 - mova [dstq+strideq+16 ], m5 - palignr m5, m2, m1, 10 - palignr m7, m1, m6, 10 - mova [dstq+strideq*2 ], m7 - mova [dstq+strideq*2+16], m5 - palignr m5, m2, m1, 8 - palignr m7, m1, m6, 8 - mova [dstq+stride3q ], m7 - mova [dstq+stride3q+16 ], m5 - lea dstq, [dstq+strideq*4] - palignr m5, m2, m1, 6 - palignr m7, m1, m6, 6 - mova [dstq ], m7 - mova [dstq+16 ], m5 - palignr m5, m2, m1, 4 - palignr m7, m1, m6, 4 - mova [dstq+strideq ], m7 - mova [dstq+strideq+16 ], m5 - palignr m5, m2, m1, 2 - palignr m7, m1, m6, 2 - mova [dstq+strideq*2 ], m7 - mova [dstq+strideq*2+16], m5 - mova [dstq+stride3q ], m6 - mova [dstq+stride3q+16 ], m1 - lea dstq, [dstq+strideq*4] - - palignr m5, m1, m6, 14 - palignr m3, m6, m4, 14 - mova [dstq ], m3 - mova [dstq+16 ], m5 - palignr m5, m1, m6, 12 - palignr m3, m6, m4, 12 - mova [dstq+strideq ], m3 - mova [dstq+strideq+16 ], m5 - palignr m5, m1, m6, 10 - palignr m3, m6, m4, 10 - mova [dstq+strideq*2 ], m3 - mova [dstq+strideq*2+16], m5 - palignr m5, m1, m6, 8 - palignr m3, m6, m4, 8 - mova [dstq+stride3q ], m3 - mova [dstq+stride3q+16 ], m5 - lea dstq, [dstq+strideq*4] - palignr m5, m1, m6, 6 - palignr m3, m6, m4, 6 - mova [dstq ], m3 - mova [dstq+16 ], m5 - palignr m5, m1, m6, 4 - palignr m3, m6, m4, 4 - mova [dstq+strideq ], m3 - mova [dstq+strideq+16 ], m5 - palignr m5, m1, m6, 2 - palignr m3, m6, m4, 2 - mova [dstq+strideq*2 ], m3 - mova [dstq+strideq*2+16], m5 - mova [dstq+stride3q ], m4 - mova [dstq+stride3q+16 ], m6 - lea dstq, [dstq+strideq*4] - - mova m7, [leftq] - mova m3, [leftq+16] - palignr m5, m3, m7, 15 - palignr m0, m3, m7, 14 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m5, m0, m2 ; 3-tap avg Bh - - pavgb m5, m3 ; Ah - - punpcklbw m3, m2, m5 ; A-B8 ... A-B1 - punpckhbw m2, m5 ; A-B9 ... A-Bg - pshufb m3, [GLOBAL(sh_bfedcba9876543210)] - pshufb m2, [GLOBAL(sh_bfedcba9876543210)] - - palignr m7, m6, m4, 14 - palignr m0, m4, m3, 14 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m6, m4, 12 - palignr m0, m4, m3, 12 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m6, m4, 10 - palignr m0, m4, m3, 10 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - palignr m7, m6, m4, 8 - palignr m0, m4, m3, 8 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q+16 ], m7 - lea dstq, [dstq+strideq*4] - palignr m7, m6, m4, 6 - palignr m0, m4, m3, 6 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m6, m4, 4 - palignr m0, m4, m3, 4 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m6, m4, 2 - palignr m0, m4, m3, 2 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - mova [dstq+stride3q ], m3 - mova [dstq+stride3q+16 ], m4 - lea dstq, [dstq+strideq*4] - - palignr m7, m4, m3, 14 - palignr m0, m3, m2, 14 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m4, m3, 12 - palignr m0, m3, m2, 12 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m4, m3, 10 - palignr m0, m3, m2, 10 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - palignr m7, m4, m3, 8 - palignr m0, m3, m2, 8 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q+16 ], m7 - lea dstq, [dstq+strideq*4] - palignr m7, m4, m3, 6 - palignr m0, m3, m2, 6 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m4, m3, 4 - palignr m0, m3, m2, 4 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m4, m3, 2 - palignr m0, m3, m2, 2 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - mova [dstq+stride3q ], m2 - mova [dstq+stride3q+16 ], m3 - - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d207_predictor_8x8, 4, 5, 4, dst, stride, stride3, left, goffset - GET_GOT goffsetq - movq m3, [leftq] ; abcdefgh [byte] - lea stride3q, [strideq*3] - - pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] - pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] - pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m3 - pavgb m0, m2 - punpcklbw m0, m3 ; interleaved output - - movq [dstq ], m0 - psrldq m0, 2 - movq [dstq+strideq ], m0 - psrldq m0, 2 - movq [dstq+strideq*2], m0 - psrldq m0, 2 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - pshufhw m0, m0, q0000 ; de, d2ef, ef, e2fg, fg, f2gh, gh, g3h, 8xh - psrldq m0, 2 - movq [dstq ], m0 - psrldq m0, 2 - movq [dstq+strideq ], m0 - psrldq m0, 2 - movq [dstq+strideq*2], m0 - psrldq m0, 2 - movq [dstq+stride3q ], m0 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d207_predictor_16x16, 4, 5, 5, dst, stride, stride3, left, goffset - GET_GOT goffsetq - lea stride3q, [strideq*3] - mova m0, [leftq] ; abcdefghijklmnop [byte] - pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] ; bcdefghijklmnopp - pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 - pavgb m1, m0 ; ab, bc, cd .. no, op, pp [byte] - - punpckhbw m4, m1, m3 ; interleaved input - punpcklbw m1, m3 ; interleaved output - mova [dstq ], m1 - palignr m3, m4, m1, 2 - mova [dstq+strideq ], m3 - palignr m3, m4, m1, 4 - mova [dstq+strideq*2], m3 - palignr m3, m4, m1, 6 - mova [dstq+stride3q ], m3 - lea dstq, [dstq+strideq*4] - palignr m3, m4, m1, 8 - mova [dstq ], m3 - palignr m3, m4, m1, 10 - mova [dstq+strideq ], m3 - palignr m3, m4, m1, 12 - mova [dstq+strideq*2], m3 - palignr m3, m4, m1, 14 - mova [dstq+stride3q ], m3 - DEFINE_ARGS dst, stride, stride3, line - mov lined, 2 - mova m0, [GLOBAL(sh_b23456789abcdefff)] -.loop: - lea dstq, [dstq+strideq*4] - mova [dstq ], m4 - pshufb m4, m0 - mova [dstq+strideq ], m4 - pshufb m4, m0 - mova [dstq+strideq*2], m4 - pshufb m4, m0 - mova [dstq+stride3q ], m4 - pshufb m4, m0 - dec lined - jnz .loop - RESTORE_GOT - REP_RET - -INIT_XMM ssse3 -cglobal d207_predictor_32x32, 4, 5, 8, dst, stride, stride3, left, goffset - GET_GOT goffsetq - lea stride3q, [strideq*3] - mova m1, [leftq] ; 0-15 [byte] - mova m2, [leftq+16] ; 16-31 [byte] - pshufb m0, m2, [GLOBAL(sh_b23456789abcdefff)] - pshufb m4, m2, [GLOBAL(sh_b123456789abcdeff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m2, m4, m0, m3 - palignr m6, m2, m1, 1 - palignr m5, m2, m1, 2 - pavgb m2, m4 ; high 16px even lines - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m6, m5, m0 - pavgb m1, m6 ; low 16px even lines - - punpckhbw m6, m1, m0 ; interleaved output 2 - punpcklbw m1, m0 ; interleaved output 1 - - punpckhbw m7, m2, m3 ; interleaved output 4 - punpcklbw m2, m3 ; interleaved output 3 - - ; output 1st 8 lines (and half of 2nd 8 lines) - DEFINE_ARGS dst, stride, stride3, dst8 - lea dst8q, [dstq+strideq*8] - mova [dstq ], m1 - mova [dstq +16], m6 - mova [dst8q ], m6 - palignr m0, m6, m1, 2 - palignr m4, m2, m6, 2 - mova [dstq +strideq ], m0 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m0, m6, m1, 4 - palignr m4, m2, m6, 4 - mova [dstq +strideq*2 ], m0 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m0, m6, m1, 6 - palignr m4, m2, m6, 6 - mova [dstq +stride3q ], m0 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq +strideq*4] - lea dst8q, [dst8q+strideq*4] - palignr m0, m6, m1, 8 - palignr m4, m2, m6, 8 - mova [dstq ], m0 - mova [dstq +16], m4 - mova [dst8q ], m4 - palignr m0, m6, m1, 10 - palignr m4, m2, m6, 10 - mova [dstq +strideq ], m0 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m0, m6, m1, 12 - palignr m4, m2, m6, 12 - mova [dstq +strideq*2 ], m0 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m0, m6, m1, 14 - palignr m4, m2, m6, 14 - mova [dstq +stride3q ], m0 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - - ; output 2nd half of 2nd 8 lines and half of 3rd 8 lines - mova [dstq +16], m2 - mova [dst8q ], m2 - palignr m4, m7, m2, 2 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m4, m7, m2, 4 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m4, m7, m2, 6 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - palignr m4, m7, m2, 8 - mova [dstq +16], m4 - mova [dst8q ], m4 - palignr m4, m7, m2, 10 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m4, m7, m2, 12 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m4, m7, m2, 14 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - - ; output 2nd half of 3rd 8 lines and half of 4th 8 lines - mova m0, [GLOBAL(sh_b23456789abcdefff)] - mova [dstq +16], m7 - mova [dst8q ], m7 - pshufb m7, m0 - mova [dstq +strideq +16], m7 - mova [dst8q+strideq ], m7 - pshufb m7, m0 - mova [dstq +strideq*2+16], m7 - mova [dst8q+strideq*2 ], m7 - pshufb m7, m0 - mova [dstq +stride3q +16], m7 - mova [dst8q+stride3q ], m7 - pshufb m7, m0 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - mova [dstq +16], m7 - mova [dst8q ], m7 - pshufb m7, m0 - mova [dstq +strideq +16], m7 - mova [dst8q+strideq ], m7 - pshufb m7, m0 - mova [dstq +strideq*2+16], m7 - mova [dst8q+strideq*2 ], m7 - pshufb m7, m0 - mova [dstq +stride3q +16], m7 - mova [dst8q+stride3q ], m7 - pshufb m7, m0 - lea dstq, [dstq+strideq*4] - - ; output last half of 4th 8 lines - mova [dstq +16], m7 - mova [dstq +strideq +16], m7 - mova [dstq +strideq*2+16], m7 - mova [dstq +stride3q +16], m7 - lea dstq, [dstq+strideq*4] - mova [dstq +16], m7 - mova [dstq +strideq +16], m7 - mova [dstq +strideq*2+16], m7 - mova [dstq +stride3q +16], m7 - - ; done! - RESTORE_GOT - RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c deleted file mode 100644 index df5068c624..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c +++ /dev/null @@ -1,4046 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/x86/inv_txfm_sse2.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" - -#define RECON_AND_STORE4X4(dest, in_x) \ -{ \ - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - *(int *)(dest) = _mm_cvtsi128_si32(d0); \ -} - -void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i zero = _mm_setzero_si128(); - const __m128i eight = _mm_set1_epi16(8); - const __m128i cst = _mm_setr_epi16( - (int16_t)cospi_16_64, (int16_t)cospi_16_64, (int16_t)cospi_16_64, - (int16_t)-cospi_16_64, (int16_t)cospi_24_64, (int16_t)-cospi_8_64, - (int16_t)cospi_8_64, (int16_t)cospi_24_64); - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i input0, input1, input2, input3; - - // Rows - input0 = load_input_data(input); - input2 = load_input_data(input + 8); - - // Construct i3, i1, i3, i1, i2, i0, i2, i0 - input0 = _mm_shufflelo_epi16(input0, 0xd8); - input0 = _mm_shufflehi_epi16(input0, 0xd8); - input2 = _mm_shufflelo_epi16(input2, 0xd8); - input2 = _mm_shufflehi_epi16(input2, 0xd8); - - input1 = _mm_unpackhi_epi32(input0, input0); - input0 = _mm_unpacklo_epi32(input0, input0); - input3 = _mm_unpackhi_epi32(input2, input2); - input2 = _mm_unpacklo_epi32(input2, input2); - - // Stage 1 - input0 = _mm_madd_epi16(input0, cst); - input1 = _mm_madd_epi16(input1, cst); - input2 = _mm_madd_epi16(input2, cst); - input3 = _mm_madd_epi16(input3, cst); - - input0 = _mm_add_epi32(input0, rounding); - input1 = _mm_add_epi32(input1, rounding); - input2 = _mm_add_epi32(input2, rounding); - input3 = _mm_add_epi32(input3, rounding); - - input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); - input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); - input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); - input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); - - // Stage 2 - input0 = _mm_packs_epi32(input0, input1); - input1 = _mm_packs_epi32(input2, input3); - - // Transpose - input2 = _mm_unpacklo_epi16(input0, input1); - input3 = _mm_unpackhi_epi16(input0, input1); - input0 = _mm_unpacklo_epi32(input2, input3); - input1 = _mm_unpackhi_epi32(input2, input3); - - // Switch column2, column 3, and then, we got: - // input2: column1, column 0; input3: column2, column 3. - input1 = _mm_shuffle_epi32(input1, 0x4e); - input2 = _mm_add_epi16(input0, input1); - input3 = _mm_sub_epi16(input0, input1); - - // Columns - // Construct i3, i1, i3, i1, i2, i0, i2, i0 - input0 = _mm_unpacklo_epi32(input2, input2); - input1 = _mm_unpackhi_epi32(input2, input2); - input2 = _mm_unpackhi_epi32(input3, input3); - input3 = _mm_unpacklo_epi32(input3, input3); - - // Stage 1 - input0 = _mm_madd_epi16(input0, cst); - input1 = _mm_madd_epi16(input1, cst); - input2 = _mm_madd_epi16(input2, cst); - input3 = _mm_madd_epi16(input3, cst); - - input0 = _mm_add_epi32(input0, rounding); - input1 = _mm_add_epi32(input1, rounding); - input2 = _mm_add_epi32(input2, rounding); - input3 = _mm_add_epi32(input3, rounding); - - input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); - input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); - input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); - input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); - - // Stage 2 - input0 = _mm_packs_epi32(input0, input2); - input1 = _mm_packs_epi32(input1, input3); - - // Transpose - input2 = _mm_unpacklo_epi16(input0, input1); - input3 = _mm_unpackhi_epi16(input0, input1); - input0 = _mm_unpacklo_epi32(input2, input3); - input1 = _mm_unpackhi_epi32(input2, input3); - - // Switch column2, column 3, and then, we got: - // input2: column1, column 0; input3: column2, column 3. - input1 = _mm_shuffle_epi32(input1, 0x4e); - input2 = _mm_add_epi16(input0, input1); - input3 = _mm_sub_epi16(input0, input1); - - // Final round and shift - input2 = _mm_add_epi16(input2, eight); - input3 = _mm_add_epi16(input3, eight); - - input2 = _mm_srai_epi16(input2, 4); - input3 = _mm_srai_epi16(input3, 4); - - // Reconstruction and Store - { - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); - __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi32(d0, - _mm_cvtsi32_si128(*(const int *)(dest + stride))); - d2 = _mm_unpacklo_epi32( - _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)), d2); - d0 = _mm_unpacklo_epi8(d0, zero); - d2 = _mm_unpacklo_epi8(d2, zero); - d0 = _mm_add_epi16(d0, input2); - d2 = _mm_add_epi16(d2, input3); - d0 = _mm_packus_epi16(d0, d2); - // store input0 - *(int *)dest = _mm_cvtsi128_si32(d0); - // store input1 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); - // store input2 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); - // store input3 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); - } -} - -void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 4); - - dc_value = _mm_set1_epi16(a); - - RECON_AND_STORE4X4(dest + 0 * stride, dc_value); - RECON_AND_STORE4X4(dest + 1 * stride, dc_value); - RECON_AND_STORE4X4(dest + 2 * stride, dc_value); - RECON_AND_STORE4X4(dest + 3 * stride, dc_value); -} - -static INLINE void transpose_4x4(__m128i *res) { - const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); - const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); - - res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1); - res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1); -} - -void idct4_sse2(__m128i *in) { - const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i u[8], v[8]; - - transpose_4x4(in); - // stage 1 - u[0] = _mm_unpacklo_epi16(in[0], in[1]); - u[1] = _mm_unpackhi_epi16(in[0], in[1]); - v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[1] = _mm_madd_epi16(u[0], k__cospi_p16_m16); - v[2] = _mm_madd_epi16(u[1], k__cospi_p24_m08); - v[3] = _mm_madd_epi16(u[1], k__cospi_p08_p24); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - - v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - - u[0] = _mm_packs_epi32(v[0], v[1]); - u[1] = _mm_packs_epi32(v[3], v[2]); - - // stage 2 - in[0] = _mm_add_epi16(u[0], u[1]); - in[1] = _mm_sub_epi16(u[0], u[1]); - in[1] = _mm_shuffle_epi32(in[1], 0x4E); -} - -void iadst4_sse2(__m128i *in) { - const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); - const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); - const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); - const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); - const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); - const __m128i kZero = _mm_set1_epi16(0); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i u[8], v[8], in7; - - transpose_4x4(in); - in7 = _mm_srli_si128(in[1], 8); - in7 = _mm_add_epi16(in7, in[0]); - in7 = _mm_sub_epi16(in7, in[1]); - - u[0] = _mm_unpacklo_epi16(in[0], in[1]); - u[1] = _mm_unpackhi_epi16(in[0], in[1]); - u[2] = _mm_unpacklo_epi16(in7, kZero); - u[3] = _mm_unpackhi_epi16(in[0], kZero); - - v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p04); // s0 + s3 - v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p02); // s2 + s5 - v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x2 - v[3] = _mm_madd_epi16(u[0], k__sinpi_p02_m01); // s1 - s4 - v[4] = _mm_madd_epi16(u[1], k__sinpi_p03_m04); // s2 - s6 - v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s2 - - u[0] = _mm_add_epi32(v[0], v[1]); - u[1] = _mm_add_epi32(v[3], v[4]); - u[2] = v[2]; - u[3] = _mm_add_epi32(u[0], u[1]); - u[4] = _mm_slli_epi32(v[5], 2); - u[5] = _mm_add_epi32(u[3], v[5]); - u[6] = _mm_sub_epi32(u[5], u[4]); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - - in[0] = _mm_packs_epi32(u[0], u[1]); - in[1] = _mm_packs_epi32(u[2], u[3]); -} - -#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \ - const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \ - const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \ - const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \ - const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5); \ - const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); \ - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); \ - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); \ - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); \ - \ - out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ - out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ - out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ - out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ - out4 = _mm_unpacklo_epi64(tr1_1, tr1_5); \ - out5 = _mm_unpackhi_epi64(tr1_1, tr1_5); \ - out6 = _mm_unpacklo_epi64(tr1_3, tr1_7); \ - out7 = _mm_unpackhi_epi64(tr1_3, tr1_7); \ - } - -#define TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, \ - out0, out1, out2, out3) \ - { \ - const __m128i tr0_0 = _mm_unpackhi_epi16(tmp0, tmp1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(tmp1, tmp0); \ - const __m128i tr0_4 = _mm_unpacklo_epi16(tmp2, tmp3); \ - const __m128i tr0_5 = _mm_unpackhi_epi16(tmp3, tmp2); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ - \ - out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ - out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ - out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ - out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ - } - -#define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - } - -// Define Macro for multiplying elements by constants and adding them together. -#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \ - cst0, cst1, cst2, cst3, res0, res1, res2, res3) \ - { \ - tmp0 = _mm_madd_epi16(lo_0, cst0); \ - tmp1 = _mm_madd_epi16(hi_0, cst0); \ - tmp2 = _mm_madd_epi16(lo_0, cst1); \ - tmp3 = _mm_madd_epi16(hi_0, cst1); \ - tmp4 = _mm_madd_epi16(lo_1, cst2); \ - tmp5 = _mm_madd_epi16(hi_1, cst2); \ - tmp6 = _mm_madd_epi16(lo_1, cst3); \ - tmp7 = _mm_madd_epi16(hi_1, cst3); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - tmp4 = _mm_add_epi32(tmp4, rounding); \ - tmp5 = _mm_add_epi32(tmp5, rounding); \ - tmp6 = _mm_add_epi32(tmp6, rounding); \ - tmp7 = _mm_add_epi32(tmp7, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \ - tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \ - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \ - tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \ - \ - res0 = _mm_packs_epi32(tmp0, tmp1); \ - res1 = _mm_packs_epi32(tmp2, tmp3); \ - res2 = _mm_packs_epi32(tmp4, tmp5); \ - res3 = _mm_packs_epi32(tmp6, tmp7); \ - } - -#define MULTIPLICATION_AND_ADD_2(lo_0, hi_0, cst0, cst1, res0, res1) \ - { \ - tmp0 = _mm_madd_epi16(lo_0, cst0); \ - tmp1 = _mm_madd_epi16(hi_0, cst0); \ - tmp2 = _mm_madd_epi16(lo_0, cst1); \ - tmp3 = _mm_madd_epi16(hi_0, cst1); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - res0 = _mm_packs_epi32(tmp0, tmp1); \ - res1 = _mm_packs_epi32(tmp2, tmp3); \ - } - -#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) \ - { \ - /* Stage1 */ \ - { \ - const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \ - const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7); \ - const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5); \ - const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5); \ - \ - MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, \ - stg1_1, stg1_2, stg1_3, stp1_4, \ - stp1_7, stp1_5, stp1_6) \ - } \ - \ - /* Stage2 */ \ - { \ - const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4); \ - const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4); \ - const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6); \ - const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6); \ - \ - MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, \ - stg2_1, stg2_2, stg2_3, stp2_0, \ - stp2_1, stp2_2, stp2_3) \ - \ - stp2_4 = _mm_adds_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_subs_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_subs_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_adds_epi16(stp1_7, stp1_6); \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_0 = _mm_adds_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_adds_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_subs_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_subs_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_56, stg2_1); \ - tmp1 = _mm_madd_epi16(hi_56, stg2_1); \ - tmp2 = _mm_madd_epi16(lo_56, stg2_0); \ - tmp3 = _mm_madd_epi16(hi_56, stg2_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - } \ - \ - /* Stage4 */ \ - out0 = _mm_adds_epi16(stp1_0, stp2_7); \ - out1 = _mm_adds_epi16(stp1_1, stp1_6); \ - out2 = _mm_adds_epi16(stp1_2, stp1_5); \ - out3 = _mm_adds_epi16(stp1_3, stp2_4); \ - out4 = _mm_subs_epi16(stp1_3, stp2_4); \ - out5 = _mm_subs_epi16(stp1_2, stp1_5); \ - out6 = _mm_subs_epi16(stp1_1, stp1_6); \ - out7 = _mm_subs_epi16(stp1_0, stp2_7); \ - } - -void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i zero = _mm_setzero_si128(); - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 4); - const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - - // Load input data. - in0 = load_input_data(input); - in1 = load_input_data(input + 8 * 1); - in2 = load_input_data(input + 8 * 2); - in3 = load_input_data(input + 8 * 3); - in4 = load_input_data(input + 8 * 4); - in5 = load_input_data(input + 8 * 5); - in6 = load_input_data(input + 8 * 6); - in7 = load_input_data(input + 8 * 7); - - // 2-D - for (i = 0; i < 2; i++) { - // 8x8 Transpose is copied from vpx_fdct8x8_sse2() - TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - - // 4-stage 1D idct8x8 - IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - } - - // Final rounding and shift - in0 = _mm_adds_epi16(in0, final_rounding); - in1 = _mm_adds_epi16(in1, final_rounding); - in2 = _mm_adds_epi16(in2, final_rounding); - in3 = _mm_adds_epi16(in3, final_rounding); - in4 = _mm_adds_epi16(in4, final_rounding); - in5 = _mm_adds_epi16(in5, final_rounding); - in6 = _mm_adds_epi16(in6, final_rounding); - in7 = _mm_adds_epi16(in7, final_rounding); - - in0 = _mm_srai_epi16(in0, 5); - in1 = _mm_srai_epi16(in1, 5); - in2 = _mm_srai_epi16(in2, 5); - in3 = _mm_srai_epi16(in3, 5); - in4 = _mm_srai_epi16(in4, 5); - in5 = _mm_srai_epi16(in5, 5); - in6 = _mm_srai_epi16(in6, 5); - in7 = _mm_srai_epi16(in7, 5); - - RECON_AND_STORE(dest + 0 * stride, in0); - RECON_AND_STORE(dest + 1 * stride, in1); - RECON_AND_STORE(dest + 2 * stride, in2); - RECON_AND_STORE(dest + 3 * stride, in3); - RECON_AND_STORE(dest + 4 * stride, in4); - RECON_AND_STORE(dest + 5 * stride, in5); - RECON_AND_STORE(dest + 6 * stride, in6); - RECON_AND_STORE(dest + 7 * stride, in7); -} - -void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 5); - - dc_value = _mm_set1_epi16(a); - - RECON_AND_STORE(dest + 0 * stride, dc_value); - RECON_AND_STORE(dest + 1 * stride, dc_value); - RECON_AND_STORE(dest + 2 * stride, dc_value); - RECON_AND_STORE(dest + 3 * stride, dc_value); - RECON_AND_STORE(dest + 4 * stride, dc_value); - RECON_AND_STORE(dest + 5 * stride, dc_value); - RECON_AND_STORE(dest + 6 * stride, dc_value); - RECON_AND_STORE(dest + 7 * stride, dc_value); -} - -void idct8_sse2(__m128i *in) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - - // 8x8 Transpose is copied from vpx_fdct8x8_sse2() - TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], - in0, in1, in2, in3, in4, in5, in6, in7); - - // 4-stage 1D idct8x8 - IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); -} - -void iadst8_sse2(__m128i *in) { - const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); - const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); - const __m128i k__const_0 = _mm_set1_epi16(0); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - - __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; - __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; - __m128i s0, s1, s2, s3, s4, s5, s6, s7; - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - - // transpose - array_transpose_8x8(in, in); - - // properly aligned for butterfly input - in0 = in[7]; - in1 = in[0]; - in2 = in[5]; - in3 = in[2]; - in4 = in[3]; - in5 = in[4]; - in6 = in[1]; - in7 = in[6]; - - // column transformation - // stage 1 - // interleave and multiply/add into 32-bit integer - s0 = _mm_unpacklo_epi16(in0, in1); - s1 = _mm_unpackhi_epi16(in0, in1); - s2 = _mm_unpacklo_epi16(in2, in3); - s3 = _mm_unpackhi_epi16(in2, in3); - s4 = _mm_unpacklo_epi16(in4, in5); - s5 = _mm_unpackhi_epi16(in4, in5); - s6 = _mm_unpacklo_epi16(in6, in7); - s7 = _mm_unpackhi_epi16(in6, in7); - - u0 = _mm_madd_epi16(s0, k__cospi_p02_p30); - u1 = _mm_madd_epi16(s1, k__cospi_p02_p30); - u2 = _mm_madd_epi16(s0, k__cospi_p30_m02); - u3 = _mm_madd_epi16(s1, k__cospi_p30_m02); - u4 = _mm_madd_epi16(s2, k__cospi_p10_p22); - u5 = _mm_madd_epi16(s3, k__cospi_p10_p22); - u6 = _mm_madd_epi16(s2, k__cospi_p22_m10); - u7 = _mm_madd_epi16(s3, k__cospi_p22_m10); - u8 = _mm_madd_epi16(s4, k__cospi_p18_p14); - u9 = _mm_madd_epi16(s5, k__cospi_p18_p14); - u10 = _mm_madd_epi16(s4, k__cospi_p14_m18); - u11 = _mm_madd_epi16(s5, k__cospi_p14_m18); - u12 = _mm_madd_epi16(s6, k__cospi_p26_p06); - u13 = _mm_madd_epi16(s7, k__cospi_p26_p06); - u14 = _mm_madd_epi16(s6, k__cospi_p06_m26); - u15 = _mm_madd_epi16(s7, k__cospi_p06_m26); - - // addition - w0 = _mm_add_epi32(u0, u8); - w1 = _mm_add_epi32(u1, u9); - w2 = _mm_add_epi32(u2, u10); - w3 = _mm_add_epi32(u3, u11); - w4 = _mm_add_epi32(u4, u12); - w5 = _mm_add_epi32(u5, u13); - w6 = _mm_add_epi32(u6, u14); - w7 = _mm_add_epi32(u7, u15); - w8 = _mm_sub_epi32(u0, u8); - w9 = _mm_sub_epi32(u1, u9); - w10 = _mm_sub_epi32(u2, u10); - w11 = _mm_sub_epi32(u3, u11); - w12 = _mm_sub_epi32(u4, u12); - w13 = _mm_sub_epi32(u5, u13); - w14 = _mm_sub_epi32(u6, u14); - w15 = _mm_sub_epi32(u7, u15); - - // shift and rounding - v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING); - v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING); - v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING); - v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING); - v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING); - v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING); - v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING); - v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - u8 = _mm_srai_epi32(v8, DCT_CONST_BITS); - u9 = _mm_srai_epi32(v9, DCT_CONST_BITS); - u10 = _mm_srai_epi32(v10, DCT_CONST_BITS); - u11 = _mm_srai_epi32(v11, DCT_CONST_BITS); - u12 = _mm_srai_epi32(v12, DCT_CONST_BITS); - u13 = _mm_srai_epi32(v13, DCT_CONST_BITS); - u14 = _mm_srai_epi32(v14, DCT_CONST_BITS); - u15 = _mm_srai_epi32(v15, DCT_CONST_BITS); - - // back to 16-bit and pack 8 integers into __m128i - in[0] = _mm_packs_epi32(u0, u1); - in[1] = _mm_packs_epi32(u2, u3); - in[2] = _mm_packs_epi32(u4, u5); - in[3] = _mm_packs_epi32(u6, u7); - in[4] = _mm_packs_epi32(u8, u9); - in[5] = _mm_packs_epi32(u10, u11); - in[6] = _mm_packs_epi32(u12, u13); - in[7] = _mm_packs_epi32(u14, u15); - - // stage 2 - s0 = _mm_add_epi16(in[0], in[2]); - s1 = _mm_add_epi16(in[1], in[3]); - s2 = _mm_sub_epi16(in[0], in[2]); - s3 = _mm_sub_epi16(in[1], in[3]); - u0 = _mm_unpacklo_epi16(in[4], in[5]); - u1 = _mm_unpackhi_epi16(in[4], in[5]); - u2 = _mm_unpacklo_epi16(in[6], in[7]); - u3 = _mm_unpackhi_epi16(in[6], in[7]); - - v0 = _mm_madd_epi16(u0, k__cospi_p08_p24); - v1 = _mm_madd_epi16(u1, k__cospi_p08_p24); - v2 = _mm_madd_epi16(u0, k__cospi_p24_m08); - v3 = _mm_madd_epi16(u1, k__cospi_p24_m08); - v4 = _mm_madd_epi16(u2, k__cospi_m24_p08); - v5 = _mm_madd_epi16(u3, k__cospi_m24_p08); - v6 = _mm_madd_epi16(u2, k__cospi_p08_p24); - v7 = _mm_madd_epi16(u3, k__cospi_p08_p24); - - w0 = _mm_add_epi32(v0, v4); - w1 = _mm_add_epi32(v1, v5); - w2 = _mm_add_epi32(v2, v6); - w3 = _mm_add_epi32(v3, v7); - w4 = _mm_sub_epi32(v0, v4); - w5 = _mm_sub_epi32(v1, v5); - w6 = _mm_sub_epi32(v2, v6); - w7 = _mm_sub_epi32(v3, v7); - - v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - - // back to 16-bit intergers - s4 = _mm_packs_epi32(u0, u1); - s5 = _mm_packs_epi32(u2, u3); - s6 = _mm_packs_epi32(u4, u5); - s7 = _mm_packs_epi32(u6, u7); - - // stage 3 - u0 = _mm_unpacklo_epi16(s2, s3); - u1 = _mm_unpackhi_epi16(s2, s3); - u2 = _mm_unpacklo_epi16(s6, s7); - u3 = _mm_unpackhi_epi16(s6, s7); - - v0 = _mm_madd_epi16(u0, k__cospi_p16_p16); - v1 = _mm_madd_epi16(u1, k__cospi_p16_p16); - v2 = _mm_madd_epi16(u0, k__cospi_p16_m16); - v3 = _mm_madd_epi16(u1, k__cospi_p16_m16); - v4 = _mm_madd_epi16(u2, k__cospi_p16_p16); - v5 = _mm_madd_epi16(u3, k__cospi_p16_p16); - v6 = _mm_madd_epi16(u2, k__cospi_p16_m16); - v7 = _mm_madd_epi16(u3, k__cospi_p16_m16); - - u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING); - u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING); - u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING); - u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING); - u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING); - u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING); - u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING); - u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING); - - v0 = _mm_srai_epi32(u0, DCT_CONST_BITS); - v1 = _mm_srai_epi32(u1, DCT_CONST_BITS); - v2 = _mm_srai_epi32(u2, DCT_CONST_BITS); - v3 = _mm_srai_epi32(u3, DCT_CONST_BITS); - v4 = _mm_srai_epi32(u4, DCT_CONST_BITS); - v5 = _mm_srai_epi32(u5, DCT_CONST_BITS); - v6 = _mm_srai_epi32(u6, DCT_CONST_BITS); - v7 = _mm_srai_epi32(u7, DCT_CONST_BITS); - - s2 = _mm_packs_epi32(v0, v1); - s3 = _mm_packs_epi32(v2, v3); - s6 = _mm_packs_epi32(v4, v5); - s7 = _mm_packs_epi32(v6, v7); - - in[0] = s0; - in[1] = _mm_sub_epi16(k__const_0, s4); - in[2] = s6; - in[3] = _mm_sub_epi16(k__const_0, s2); - in[4] = s3; - in[5] = _mm_sub_epi16(k__const_0, s7); - in[6] = s5; - in[7] = _mm_sub_epi16(k__const_0, s1); -} - -void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i zero = _mm_setzero_si128(); - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 4); - const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i stg3_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - - // Rows. Load 4-row input data. - in0 = load_input_data(input); - in1 = load_input_data(input + 8 * 1); - in2 = load_input_data(input + 8 * 2); - in3 = load_input_data(input + 8 * 3); - - // 8x4 Transpose - TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1); - // Stage1 - { - const __m128i lo_17 = _mm_unpackhi_epi16(in0, zero); - const __m128i lo_35 = _mm_unpackhi_epi16(in1, zero); - - tmp0 = _mm_madd_epi16(lo_17, stg1_0); - tmp2 = _mm_madd_epi16(lo_17, stg1_1); - tmp4 = _mm_madd_epi16(lo_35, stg1_2); - tmp6 = _mm_madd_epi16(lo_35, stg1_3); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - - stp1_4 = _mm_packs_epi32(tmp0, tmp2); - stp1_5 = _mm_packs_epi32(tmp4, tmp6); - } - - // Stage2 - { - const __m128i lo_04 = _mm_unpacklo_epi16(in0, zero); - const __m128i lo_26 = _mm_unpacklo_epi16(in1, zero); - - tmp0 = _mm_madd_epi16(lo_04, stg2_0); - tmp2 = _mm_madd_epi16(lo_04, stg2_1); - tmp4 = _mm_madd_epi16(lo_26, stg2_2); - tmp6 = _mm_madd_epi16(lo_26, stg2_3); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - - stp2_0 = _mm_packs_epi32(tmp0, tmp2); - stp2_2 = _mm_packs_epi32(tmp6, tmp4); - - tmp0 = _mm_adds_epi16(stp1_4, stp1_5); - tmp1 = _mm_subs_epi16(stp1_4, stp1_5); - - stp2_4 = tmp0; - stp2_5 = _mm_unpacklo_epi64(tmp1, zero); - stp2_6 = _mm_unpackhi_epi64(tmp1, zero); - } - - // Stage3 - { - const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6); - - tmp4 = _mm_adds_epi16(stp2_0, stp2_2); - tmp6 = _mm_subs_epi16(stp2_0, stp2_2); - - stp1_2 = _mm_unpackhi_epi64(tmp6, tmp4); - stp1_3 = _mm_unpacklo_epi64(tmp6, tmp4); - - tmp0 = _mm_madd_epi16(lo_56, stg3_0); - tmp2 = _mm_madd_epi16(lo_56, stg2_0); // stg3_1 = stg2_0 - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - - stp1_5 = _mm_packs_epi32(tmp0, tmp2); - } - - // Stage4 - tmp0 = _mm_adds_epi16(stp1_3, stp2_4); - tmp1 = _mm_adds_epi16(stp1_2, stp1_5); - tmp2 = _mm_subs_epi16(stp1_3, stp2_4); - tmp3 = _mm_subs_epi16(stp1_2, stp1_5); - - TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3) - - IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, - in0, in1, in2, in3, in4, in5, in6, in7); - // Final rounding and shift - in0 = _mm_adds_epi16(in0, final_rounding); - in1 = _mm_adds_epi16(in1, final_rounding); - in2 = _mm_adds_epi16(in2, final_rounding); - in3 = _mm_adds_epi16(in3, final_rounding); - in4 = _mm_adds_epi16(in4, final_rounding); - in5 = _mm_adds_epi16(in5, final_rounding); - in6 = _mm_adds_epi16(in6, final_rounding); - in7 = _mm_adds_epi16(in7, final_rounding); - - in0 = _mm_srai_epi16(in0, 5); - in1 = _mm_srai_epi16(in1, 5); - in2 = _mm_srai_epi16(in2, 5); - in3 = _mm_srai_epi16(in3, 5); - in4 = _mm_srai_epi16(in4, 5); - in5 = _mm_srai_epi16(in5, 5); - in6 = _mm_srai_epi16(in6, 5); - in7 = _mm_srai_epi16(in7, 5); - - RECON_AND_STORE(dest + 0 * stride, in0); - RECON_AND_STORE(dest + 1 * stride, in1); - RECON_AND_STORE(dest + 2 * stride, in2); - RECON_AND_STORE(dest + 3 * stride, in3); - RECON_AND_STORE(dest + 4 * stride, in4); - RECON_AND_STORE(dest + 5 * stride, in5); - RECON_AND_STORE(dest + 6 * stride, in6); - RECON_AND_STORE(dest + 7 * stride, in7); -} - -#define IDCT16 \ - /* Stage2 */ \ - { \ - const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \ - const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \ - const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]); \ - const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]); \ - const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \ - const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \ - const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \ - const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, \ - stg2_0, stg2_1, stg2_2, stg2_3, \ - stp2_8, stp2_15, stp2_9, stp2_14) \ - \ - MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, \ - stg2_4, stg2_5, stg2_6, stg2_7, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \ - const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \ - const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \ - const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \ - \ - MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, \ - stg3_0, stg3_1, stg3_2, stg3_3, \ - stp1_4, stp1_7, stp1_5, stp1_6) \ - \ - stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); \ - stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ - stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ - \ - stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); \ - stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ - } \ - \ - /* Stage4 */ \ - { \ - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \ - const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \ - const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \ - const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, \ - stg4_0, stg4_1, stg4_2, stg4_3, \ - stp2_0, stp2_1, stp2_2, stp2_3) \ - \ - stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ - stg4_4, stg4_5, stg4_6, stg4_7, \ - stp2_9, stp2_14, stp2_10, stp2_13) \ - } \ - \ - /* Stage5 */ \ - { \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ - \ - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ - } \ - \ - /* Stage6 */ \ - { \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } - -#define IDCT16_10 \ - /* Stage2 */ \ - { \ - const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \ - const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero); \ - const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]); \ - const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, \ - stg2_0, stg2_1, stg2_6, stg2_7, \ - stp1_8_0, stp1_15, stp1_11, stp1_12_0) \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero); \ - const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero); \ - \ - MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, \ - stg3_0, stg3_1, \ - stp2_4, stp2_7) \ - \ - stp1_9 = stp1_8_0; \ - stp1_10 = stp1_11; \ - \ - stp1_13 = stp1_12_0; \ - stp1_14 = stp1_15; \ - } \ - \ - /* Stage4 */ \ - { \ - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); \ - const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, \ - stg4_0, stg4_1, \ - stp1_0, stp1_1) \ - stp2_5 = stp2_4; \ - stp2_6 = stp2_7; \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ - stg4_4, stg4_5, stg4_6, stg4_7, \ - stp2_9, stp2_14, stp2_10, stp2_13) \ - } \ - \ - /* Stage5 */ \ - { \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_2 = stp1_1; \ - stp1_3 = stp1_0; \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ - \ - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ - } \ - \ - /* Stage6 */ \ - { \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } - -void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 5); - const __m128i zero = _mm_setzero_si128(); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in[16], l[16], r[16], *curr1; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - - curr1 = l; - for (i = 0; i < 2; i++) { - // 1-D idct - - // Load input data. - in[0] = load_input_data(input); - in[8] = load_input_data(input + 8 * 1); - in[1] = load_input_data(input + 8 * 2); - in[9] = load_input_data(input + 8 * 3); - in[2] = load_input_data(input + 8 * 4); - in[10] = load_input_data(input + 8 * 5); - in[3] = load_input_data(input + 8 * 6); - in[11] = load_input_data(input + 8 * 7); - in[4] = load_input_data(input + 8 * 8); - in[12] = load_input_data(input + 8 * 9); - in[5] = load_input_data(input + 8 * 10); - in[13] = load_input_data(input + 8 * 11); - in[6] = load_input_data(input + 8 * 12); - in[14] = load_input_data(input + 8 * 13); - in[7] = load_input_data(input + 8 * 14); - in[15] = load_input_data(input + 8 * 15); - - array_transpose_8x8(in, in); - array_transpose_8x8(in + 8, in + 8); - - IDCT16 - - // Stage7 - curr1[0] = _mm_add_epi16(stp2_0, stp1_15); - curr1[1] = _mm_add_epi16(stp2_1, stp1_14); - curr1[2] = _mm_add_epi16(stp2_2, stp2_13); - curr1[3] = _mm_add_epi16(stp2_3, stp2_12); - curr1[4] = _mm_add_epi16(stp2_4, stp2_11); - curr1[5] = _mm_add_epi16(stp2_5, stp2_10); - curr1[6] = _mm_add_epi16(stp2_6, stp1_9); - curr1[7] = _mm_add_epi16(stp2_7, stp1_8); - curr1[8] = _mm_sub_epi16(stp2_7, stp1_8); - curr1[9] = _mm_sub_epi16(stp2_6, stp1_9); - curr1[10] = _mm_sub_epi16(stp2_5, stp2_10); - curr1[11] = _mm_sub_epi16(stp2_4, stp2_11); - curr1[12] = _mm_sub_epi16(stp2_3, stp2_12); - curr1[13] = _mm_sub_epi16(stp2_2, stp2_13); - curr1[14] = _mm_sub_epi16(stp2_1, stp1_14); - curr1[15] = _mm_sub_epi16(stp2_0, stp1_15); - - curr1 = r; - input += 128; - } - for (i = 0; i < 2; i++) { - int j; - // 1-D idct - array_transpose_8x8(l + i * 8, in); - array_transpose_8x8(r + i * 8, in + 8); - - IDCT16 - - // 2-D - in[0] = _mm_add_epi16(stp2_0, stp1_15); - in[1] = _mm_add_epi16(stp2_1, stp1_14); - in[2] = _mm_add_epi16(stp2_2, stp2_13); - in[3] = _mm_add_epi16(stp2_3, stp2_12); - in[4] = _mm_add_epi16(stp2_4, stp2_11); - in[5] = _mm_add_epi16(stp2_5, stp2_10); - in[6] = _mm_add_epi16(stp2_6, stp1_9); - in[7] = _mm_add_epi16(stp2_7, stp1_8); - in[8] = _mm_sub_epi16(stp2_7, stp1_8); - in[9] = _mm_sub_epi16(stp2_6, stp1_9); - in[10] = _mm_sub_epi16(stp2_5, stp2_10); - in[11] = _mm_sub_epi16(stp2_4, stp2_11); - in[12] = _mm_sub_epi16(stp2_3, stp2_12); - in[13] = _mm_sub_epi16(stp2_2, stp2_13); - in[14] = _mm_sub_epi16(stp2_1, stp1_14); - in[15] = _mm_sub_epi16(stp2_0, stp1_15); - - for (j = 0; j < 16; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a, i; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 6); - - dc_value = _mm_set1_epi16(a); - - for (i = 0; i < 16; ++i) { - RECON_AND_STORE(dest + 0, dc_value); - RECON_AND_STORE(dest + 8, dc_value); - dest += stride; - } -} - -static void iadst16_8col(__m128i *in) { - // perform 16x16 1-D ADST for 8 columns - __m128i s[16], x[16], u[32], v[32]; - const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); - const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64); - const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64); - const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64); - const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64); - const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64); - const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64); - const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64); - const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64); - const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64); - const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64); - const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64); - const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64); - const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64); - const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64); - const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); - const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); - const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i kZero = _mm_set1_epi16(0); - - u[0] = _mm_unpacklo_epi16(in[15], in[0]); - u[1] = _mm_unpackhi_epi16(in[15], in[0]); - u[2] = _mm_unpacklo_epi16(in[13], in[2]); - u[3] = _mm_unpackhi_epi16(in[13], in[2]); - u[4] = _mm_unpacklo_epi16(in[11], in[4]); - u[5] = _mm_unpackhi_epi16(in[11], in[4]); - u[6] = _mm_unpacklo_epi16(in[9], in[6]); - u[7] = _mm_unpackhi_epi16(in[9], in[6]); - u[8] = _mm_unpacklo_epi16(in[7], in[8]); - u[9] = _mm_unpackhi_epi16(in[7], in[8]); - u[10] = _mm_unpacklo_epi16(in[5], in[10]); - u[11] = _mm_unpackhi_epi16(in[5], in[10]); - u[12] = _mm_unpacklo_epi16(in[3], in[12]); - u[13] = _mm_unpackhi_epi16(in[3], in[12]); - u[14] = _mm_unpacklo_epi16(in[1], in[14]); - u[15] = _mm_unpackhi_epi16(in[1], in[14]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31); - v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31); - v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01); - v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01); - v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27); - v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27); - v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05); - v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05); - v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23); - v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23); - v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09); - v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09); - v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19); - v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19); - v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13); - v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13); - v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15); - v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15); - v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17); - v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17); - v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11); - v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11); - v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21); - v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21); - v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07); - v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07); - v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25); - v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25); - v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03); - v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03); - v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29); - v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29); - - u[0] = _mm_add_epi32(v[0], v[16]); - u[1] = _mm_add_epi32(v[1], v[17]); - u[2] = _mm_add_epi32(v[2], v[18]); - u[3] = _mm_add_epi32(v[3], v[19]); - u[4] = _mm_add_epi32(v[4], v[20]); - u[5] = _mm_add_epi32(v[5], v[21]); - u[6] = _mm_add_epi32(v[6], v[22]); - u[7] = _mm_add_epi32(v[7], v[23]); - u[8] = _mm_add_epi32(v[8], v[24]); - u[9] = _mm_add_epi32(v[9], v[25]); - u[10] = _mm_add_epi32(v[10], v[26]); - u[11] = _mm_add_epi32(v[11], v[27]); - u[12] = _mm_add_epi32(v[12], v[28]); - u[13] = _mm_add_epi32(v[13], v[29]); - u[14] = _mm_add_epi32(v[14], v[30]); - u[15] = _mm_add_epi32(v[15], v[31]); - u[16] = _mm_sub_epi32(v[0], v[16]); - u[17] = _mm_sub_epi32(v[1], v[17]); - u[18] = _mm_sub_epi32(v[2], v[18]); - u[19] = _mm_sub_epi32(v[3], v[19]); - u[20] = _mm_sub_epi32(v[4], v[20]); - u[21] = _mm_sub_epi32(v[5], v[21]); - u[22] = _mm_sub_epi32(v[6], v[22]); - u[23] = _mm_sub_epi32(v[7], v[23]); - u[24] = _mm_sub_epi32(v[8], v[24]); - u[25] = _mm_sub_epi32(v[9], v[25]); - u[26] = _mm_sub_epi32(v[10], v[26]); - u[27] = _mm_sub_epi32(v[11], v[27]); - u[28] = _mm_sub_epi32(v[12], v[28]); - u[29] = _mm_sub_epi32(v[13], v[29]); - u[30] = _mm_sub_epi32(v[14], v[30]); - u[31] = _mm_sub_epi32(v[15], v[31]); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING); - v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING); - v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING); - v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING); - v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING); - v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING); - v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING); - v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING); - v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING); - v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING); - v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING); - v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING); - v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING); - v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING); - v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING); - v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS); - u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS); - u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS); - u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS); - u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS); - u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS); - u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS); - u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS); - u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS); - u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS); - u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS); - u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS); - u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS); - u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS); - u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS); - u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS); - - s[0] = _mm_packs_epi32(u[0], u[1]); - s[1] = _mm_packs_epi32(u[2], u[3]); - s[2] = _mm_packs_epi32(u[4], u[5]); - s[3] = _mm_packs_epi32(u[6], u[7]); - s[4] = _mm_packs_epi32(u[8], u[9]); - s[5] = _mm_packs_epi32(u[10], u[11]); - s[6] = _mm_packs_epi32(u[12], u[13]); - s[7] = _mm_packs_epi32(u[14], u[15]); - s[8] = _mm_packs_epi32(u[16], u[17]); - s[9] = _mm_packs_epi32(u[18], u[19]); - s[10] = _mm_packs_epi32(u[20], u[21]); - s[11] = _mm_packs_epi32(u[22], u[23]); - s[12] = _mm_packs_epi32(u[24], u[25]); - s[13] = _mm_packs_epi32(u[26], u[27]); - s[14] = _mm_packs_epi32(u[28], u[29]); - s[15] = _mm_packs_epi32(u[30], u[31]); - - // stage 2 - u[0] = _mm_unpacklo_epi16(s[8], s[9]); - u[1] = _mm_unpackhi_epi16(s[8], s[9]); - u[2] = _mm_unpacklo_epi16(s[10], s[11]); - u[3] = _mm_unpackhi_epi16(s[10], s[11]); - u[4] = _mm_unpacklo_epi16(s[12], s[13]); - u[5] = _mm_unpackhi_epi16(s[12], s[13]); - u[6] = _mm_unpacklo_epi16(s[14], s[15]); - u[7] = _mm_unpackhi_epi16(s[14], s[15]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28); - v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28); - v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04); - v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04); - v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12); - v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12); - v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20); - v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20); - v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04); - v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04); - v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28); - v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28); - v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20); - v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20); - v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12); - v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12); - - u[0] = _mm_add_epi32(v[0], v[8]); - u[1] = _mm_add_epi32(v[1], v[9]); - u[2] = _mm_add_epi32(v[2], v[10]); - u[3] = _mm_add_epi32(v[3], v[11]); - u[4] = _mm_add_epi32(v[4], v[12]); - u[5] = _mm_add_epi32(v[5], v[13]); - u[6] = _mm_add_epi32(v[6], v[14]); - u[7] = _mm_add_epi32(v[7], v[15]); - u[8] = _mm_sub_epi32(v[0], v[8]); - u[9] = _mm_sub_epi32(v[1], v[9]); - u[10] = _mm_sub_epi32(v[2], v[10]); - u[11] = _mm_sub_epi32(v[3], v[11]); - u[12] = _mm_sub_epi32(v[4], v[12]); - u[13] = _mm_sub_epi32(v[5], v[13]); - u[14] = _mm_sub_epi32(v[6], v[14]); - u[15] = _mm_sub_epi32(v[7], v[15]); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - - x[0] = _mm_add_epi16(s[0], s[4]); - x[1] = _mm_add_epi16(s[1], s[5]); - x[2] = _mm_add_epi16(s[2], s[6]); - x[3] = _mm_add_epi16(s[3], s[7]); - x[4] = _mm_sub_epi16(s[0], s[4]); - x[5] = _mm_sub_epi16(s[1], s[5]); - x[6] = _mm_sub_epi16(s[2], s[6]); - x[7] = _mm_sub_epi16(s[3], s[7]); - x[8] = _mm_packs_epi32(u[0], u[1]); - x[9] = _mm_packs_epi32(u[2], u[3]); - x[10] = _mm_packs_epi32(u[4], u[5]); - x[11] = _mm_packs_epi32(u[6], u[7]); - x[12] = _mm_packs_epi32(u[8], u[9]); - x[13] = _mm_packs_epi32(u[10], u[11]); - x[14] = _mm_packs_epi32(u[12], u[13]); - x[15] = _mm_packs_epi32(u[14], u[15]); - - // stage 3 - u[0] = _mm_unpacklo_epi16(x[4], x[5]); - u[1] = _mm_unpackhi_epi16(x[4], x[5]); - u[2] = _mm_unpacklo_epi16(x[6], x[7]); - u[3] = _mm_unpackhi_epi16(x[6], x[7]); - u[4] = _mm_unpacklo_epi16(x[12], x[13]); - u[5] = _mm_unpackhi_epi16(x[12], x[13]); - u[6] = _mm_unpacklo_epi16(x[14], x[15]); - u[7] = _mm_unpackhi_epi16(x[14], x[15]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24); - v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24); - v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08); - v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08); - v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08); - v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08); - v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); - v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); - v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24); - v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24); - v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08); - v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08); - v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08); - v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08); - v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24); - v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24); - - u[0] = _mm_add_epi32(v[0], v[4]); - u[1] = _mm_add_epi32(v[1], v[5]); - u[2] = _mm_add_epi32(v[2], v[6]); - u[3] = _mm_add_epi32(v[3], v[7]); - u[4] = _mm_sub_epi32(v[0], v[4]); - u[5] = _mm_sub_epi32(v[1], v[5]); - u[6] = _mm_sub_epi32(v[2], v[6]); - u[7] = _mm_sub_epi32(v[3], v[7]); - u[8] = _mm_add_epi32(v[8], v[12]); - u[9] = _mm_add_epi32(v[9], v[13]); - u[10] = _mm_add_epi32(v[10], v[14]); - u[11] = _mm_add_epi32(v[11], v[15]); - u[12] = _mm_sub_epi32(v[8], v[12]); - u[13] = _mm_sub_epi32(v[9], v[13]); - u[14] = _mm_sub_epi32(v[10], v[14]); - u[15] = _mm_sub_epi32(v[11], v[15]); - - u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[0] = _mm_add_epi16(x[0], x[2]); - s[1] = _mm_add_epi16(x[1], x[3]); - s[2] = _mm_sub_epi16(x[0], x[2]); - s[3] = _mm_sub_epi16(x[1], x[3]); - s[4] = _mm_packs_epi32(v[0], v[1]); - s[5] = _mm_packs_epi32(v[2], v[3]); - s[6] = _mm_packs_epi32(v[4], v[5]); - s[7] = _mm_packs_epi32(v[6], v[7]); - s[8] = _mm_add_epi16(x[8], x[10]); - s[9] = _mm_add_epi16(x[9], x[11]); - s[10] = _mm_sub_epi16(x[8], x[10]); - s[11] = _mm_sub_epi16(x[9], x[11]); - s[12] = _mm_packs_epi32(v[8], v[9]); - s[13] = _mm_packs_epi32(v[10], v[11]); - s[14] = _mm_packs_epi32(v[12], v[13]); - s[15] = _mm_packs_epi32(v[14], v[15]); - - // stage 4 - u[0] = _mm_unpacklo_epi16(s[2], s[3]); - u[1] = _mm_unpackhi_epi16(s[2], s[3]); - u[2] = _mm_unpacklo_epi16(s[6], s[7]); - u[3] = _mm_unpackhi_epi16(s[6], s[7]); - u[4] = _mm_unpacklo_epi16(s[10], s[11]); - u[5] = _mm_unpackhi_epi16(s[10], s[11]); - u[6] = _mm_unpacklo_epi16(s[14], s[15]); - u[7] = _mm_unpackhi_epi16(s[14], s[15]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_m16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_m16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16); - v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16); - v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16); - v[6] = _mm_madd_epi16(u[2], k__cospi_m16_p16); - v[7] = _mm_madd_epi16(u[3], k__cospi_m16_p16); - v[8] = _mm_madd_epi16(u[4], k__cospi_p16_p16); - v[9] = _mm_madd_epi16(u[5], k__cospi_p16_p16); - v[10] = _mm_madd_epi16(u[4], k__cospi_m16_p16); - v[11] = _mm_madd_epi16(u[5], k__cospi_m16_p16); - v[12] = _mm_madd_epi16(u[6], k__cospi_m16_m16); - v[13] = _mm_madd_epi16(u[7], k__cospi_m16_m16); - v[14] = _mm_madd_epi16(u[6], k__cospi_p16_m16); - v[15] = _mm_madd_epi16(u[7], k__cospi_p16_m16); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - in[0] = s[0]; - in[1] = _mm_sub_epi16(kZero, s[8]); - in[2] = s[12]; - in[3] = _mm_sub_epi16(kZero, s[4]); - in[4] = _mm_packs_epi32(v[4], v[5]); - in[5] = _mm_packs_epi32(v[12], v[13]); - in[6] = _mm_packs_epi32(v[8], v[9]); - in[7] = _mm_packs_epi32(v[0], v[1]); - in[8] = _mm_packs_epi32(v[2], v[3]); - in[9] = _mm_packs_epi32(v[10], v[11]); - in[10] = _mm_packs_epi32(v[14], v[15]); - in[11] = _mm_packs_epi32(v[6], v[7]); - in[12] = s[5]; - in[13] = _mm_sub_epi16(kZero, s[13]); - in[14] = s[9]; - in[15] = _mm_sub_epi16(kZero, s[1]); -} - -static void idct16_8col(__m128i *in) { - const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); - const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i v[16], u[16], s[16], t[16]; - - // stage 1 - s[0] = in[0]; - s[1] = in[8]; - s[2] = in[4]; - s[3] = in[12]; - s[4] = in[2]; - s[5] = in[10]; - s[6] = in[6]; - s[7] = in[14]; - s[8] = in[1]; - s[9] = in[9]; - s[10] = in[5]; - s[11] = in[13]; - s[12] = in[3]; - s[13] = in[11]; - s[14] = in[7]; - s[15] = in[15]; - - // stage 2 - u[0] = _mm_unpacklo_epi16(s[8], s[15]); - u[1] = _mm_unpackhi_epi16(s[8], s[15]); - u[2] = _mm_unpacklo_epi16(s[9], s[14]); - u[3] = _mm_unpackhi_epi16(s[9], s[14]); - u[4] = _mm_unpacklo_epi16(s[10], s[13]); - u[5] = _mm_unpackhi_epi16(s[10], s[13]); - u[6] = _mm_unpacklo_epi16(s[11], s[12]); - u[7] = _mm_unpackhi_epi16(s[11], s[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02); - v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02); - v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30); - v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30); - v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18); - v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18); - v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14); - v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14); - v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10); - v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10); - v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22); - v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22); - v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26); - v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26); - v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06); - v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[8] = _mm_packs_epi32(u[0], u[1]); - s[15] = _mm_packs_epi32(u[2], u[3]); - s[9] = _mm_packs_epi32(u[4], u[5]); - s[14] = _mm_packs_epi32(u[6], u[7]); - s[10] = _mm_packs_epi32(u[8], u[9]); - s[13] = _mm_packs_epi32(u[10], u[11]); - s[11] = _mm_packs_epi32(u[12], u[13]); - s[12] = _mm_packs_epi32(u[14], u[15]); - - // stage 3 - t[0] = s[0]; - t[1] = s[1]; - t[2] = s[2]; - t[3] = s[3]; - u[0] = _mm_unpacklo_epi16(s[4], s[7]); - u[1] = _mm_unpackhi_epi16(s[4], s[7]); - u[2] = _mm_unpacklo_epi16(s[5], s[6]); - u[3] = _mm_unpackhi_epi16(s[5], s[6]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04); - v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04); - v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28); - v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28); - v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20); - v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20); - v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12); - v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - t[4] = _mm_packs_epi32(u[0], u[1]); - t[7] = _mm_packs_epi32(u[2], u[3]); - t[5] = _mm_packs_epi32(u[4], u[5]); - t[6] = _mm_packs_epi32(u[6], u[7]); - t[8] = _mm_add_epi16(s[8], s[9]); - t[9] = _mm_sub_epi16(s[8], s[9]); - t[10] = _mm_sub_epi16(s[11], s[10]); - t[11] = _mm_add_epi16(s[10], s[11]); - t[12] = _mm_add_epi16(s[12], s[13]); - t[13] = _mm_sub_epi16(s[12], s[13]); - t[14] = _mm_sub_epi16(s[15], s[14]); - t[15] = _mm_add_epi16(s[14], s[15]); - - // stage 4 - u[0] = _mm_unpacklo_epi16(t[0], t[1]); - u[1] = _mm_unpackhi_epi16(t[0], t[1]); - u[2] = _mm_unpacklo_epi16(t[2], t[3]); - u[3] = _mm_unpackhi_epi16(t[2], t[3]); - u[4] = _mm_unpacklo_epi16(t[9], t[14]); - u[5] = _mm_unpackhi_epi16(t[9], t[14]); - u[6] = _mm_unpacklo_epi16(t[10], t[13]); - u[7] = _mm_unpackhi_epi16(t[10], t[13]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16); - v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08); - v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08); - v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); - v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); - v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24); - v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24); - v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08); - v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08); - v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08); - v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08); - v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24); - v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[0] = _mm_packs_epi32(u[0], u[1]); - s[1] = _mm_packs_epi32(u[2], u[3]); - s[2] = _mm_packs_epi32(u[4], u[5]); - s[3] = _mm_packs_epi32(u[6], u[7]); - s[4] = _mm_add_epi16(t[4], t[5]); - s[5] = _mm_sub_epi16(t[4], t[5]); - s[6] = _mm_sub_epi16(t[7], t[6]); - s[7] = _mm_add_epi16(t[6], t[7]); - s[8] = t[8]; - s[15] = t[15]; - s[9] = _mm_packs_epi32(u[8], u[9]); - s[14] = _mm_packs_epi32(u[10], u[11]); - s[10] = _mm_packs_epi32(u[12], u[13]); - s[13] = _mm_packs_epi32(u[14], u[15]); - s[11] = t[11]; - s[12] = t[12]; - - // stage 5 - t[0] = _mm_add_epi16(s[0], s[3]); - t[1] = _mm_add_epi16(s[1], s[2]); - t[2] = _mm_sub_epi16(s[1], s[2]); - t[3] = _mm_sub_epi16(s[0], s[3]); - t[4] = s[4]; - t[7] = s[7]; - - u[0] = _mm_unpacklo_epi16(s[5], s[6]); - u[1] = _mm_unpackhi_epi16(s[5], s[6]); - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - t[5] = _mm_packs_epi32(u[0], u[1]); - t[6] = _mm_packs_epi32(u[2], u[3]); - - t[8] = _mm_add_epi16(s[8], s[11]); - t[9] = _mm_add_epi16(s[9], s[10]); - t[10] = _mm_sub_epi16(s[9], s[10]); - t[11] = _mm_sub_epi16(s[8], s[11]); - t[12] = _mm_sub_epi16(s[15], s[12]); - t[13] = _mm_sub_epi16(s[14], s[13]); - t[14] = _mm_add_epi16(s[13], s[14]); - t[15] = _mm_add_epi16(s[12], s[15]); - - // stage 6 - s[0] = _mm_add_epi16(t[0], t[7]); - s[1] = _mm_add_epi16(t[1], t[6]); - s[2] = _mm_add_epi16(t[2], t[5]); - s[3] = _mm_add_epi16(t[3], t[4]); - s[4] = _mm_sub_epi16(t[3], t[4]); - s[5] = _mm_sub_epi16(t[2], t[5]); - s[6] = _mm_sub_epi16(t[1], t[6]); - s[7] = _mm_sub_epi16(t[0], t[7]); - s[8] = t[8]; - s[9] = t[9]; - - u[0] = _mm_unpacklo_epi16(t[10], t[13]); - u[1] = _mm_unpackhi_epi16(t[10], t[13]); - u[2] = _mm_unpacklo_epi16(t[11], t[12]); - u[3] = _mm_unpackhi_epi16(t[11], t[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16); - v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16); - v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16); - v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - s[10] = _mm_packs_epi32(u[0], u[1]); - s[13] = _mm_packs_epi32(u[2], u[3]); - s[11] = _mm_packs_epi32(u[4], u[5]); - s[12] = _mm_packs_epi32(u[6], u[7]); - s[14] = t[14]; - s[15] = t[15]; - - // stage 7 - in[0] = _mm_add_epi16(s[0], s[15]); - in[1] = _mm_add_epi16(s[1], s[14]); - in[2] = _mm_add_epi16(s[2], s[13]); - in[3] = _mm_add_epi16(s[3], s[12]); - in[4] = _mm_add_epi16(s[4], s[11]); - in[5] = _mm_add_epi16(s[5], s[10]); - in[6] = _mm_add_epi16(s[6], s[9]); - in[7] = _mm_add_epi16(s[7], s[8]); - in[8] = _mm_sub_epi16(s[7], s[8]); - in[9] = _mm_sub_epi16(s[6], s[9]); - in[10] = _mm_sub_epi16(s[5], s[10]); - in[11] = _mm_sub_epi16(s[4], s[11]); - in[12] = _mm_sub_epi16(s[3], s[12]); - in[13] = _mm_sub_epi16(s[2], s[13]); - in[14] = _mm_sub_epi16(s[1], s[14]); - in[15] = _mm_sub_epi16(s[0], s[15]); -} - -void idct16_sse2(__m128i *in0, __m128i *in1) { - array_transpose_16x16(in0, in1); - idct16_8col(in0); - idct16_8col(in1); -} - -void iadst16_sse2(__m128i *in0, __m128i *in1) { - array_transpose_16x16(in0, in1); - iadst16_8col(in0); - iadst16_8col(in1); -} - -void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 5); - const __m128i zero = _mm_setzero_si128(); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - __m128i in[16], l[16]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - // First 1-D inverse DCT - // Load input data. - in[0] = load_input_data(input); - in[1] = load_input_data(input + 8 * 2); - in[2] = load_input_data(input + 8 * 4); - in[3] = load_input_data(input + 8 * 6); - - TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]); - - // Stage2 - { - const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], zero); - const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]); - - tmp0 = _mm_madd_epi16(lo_1_15, stg2_0); - tmp2 = _mm_madd_epi16(lo_1_15, stg2_1); - tmp5 = _mm_madd_epi16(lo_13_3, stg2_6); - tmp7 = _mm_madd_epi16(lo_13_3, stg2_7); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp5 = _mm_add_epi32(tmp5, rounding); - tmp7 = _mm_add_epi32(tmp7, rounding); - - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); - tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); - - stp2_8 = _mm_packs_epi32(tmp0, tmp2); - stp2_11 = _mm_packs_epi32(tmp5, tmp7); - } - - // Stage3 - { - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], zero); - - tmp0 = _mm_madd_epi16(lo_2_14, stg3_0); - tmp2 = _mm_madd_epi16(lo_2_14, stg3_1); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - - stp1_13 = _mm_unpackhi_epi64(stp2_11, zero); - stp1_14 = _mm_unpackhi_epi64(stp2_8, zero); - - stp1_4 = _mm_packs_epi32(tmp0, tmp2); - } - - // Stage4 - { - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13); - - tmp0 = _mm_madd_epi16(lo_0_8, stg4_0); - tmp2 = _mm_madd_epi16(lo_0_8, stg4_1); - tmp1 = _mm_madd_epi16(lo_9_14, stg4_4); - tmp3 = _mm_madd_epi16(lo_9_14, stg4_5); - tmp5 = _mm_madd_epi16(lo_10_13, stg4_6); - tmp7 = _mm_madd_epi16(lo_10_13, stg4_7); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp5 = _mm_add_epi32(tmp5, rounding); - tmp7 = _mm_add_epi32(tmp7, rounding); - - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); - tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); - tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); - - stp1_0 = _mm_packs_epi32(tmp0, tmp0); - stp1_1 = _mm_packs_epi32(tmp2, tmp2); - stp2_9 = _mm_packs_epi32(tmp1, tmp3); - stp2_10 = _mm_packs_epi32(tmp5, tmp7); - - stp2_6 = _mm_unpackhi_epi64(stp1_4, zero); - } - - // Stage5 and Stage6 - { - tmp0 = _mm_add_epi16(stp2_8, stp2_11); - tmp1 = _mm_sub_epi16(stp2_8, stp2_11); - tmp2 = _mm_add_epi16(stp2_9, stp2_10); - tmp3 = _mm_sub_epi16(stp2_9, stp2_10); - - stp1_9 = _mm_unpacklo_epi64(tmp2, zero); - stp1_10 = _mm_unpacklo_epi64(tmp3, zero); - stp1_8 = _mm_unpacklo_epi64(tmp0, zero); - stp1_11 = _mm_unpacklo_epi64(tmp1, zero); - - stp1_13 = _mm_unpackhi_epi64(tmp3, zero); - stp1_14 = _mm_unpackhi_epi64(tmp2, zero); - stp1_12 = _mm_unpackhi_epi64(tmp1, zero); - stp1_15 = _mm_unpackhi_epi64(tmp0, zero); - } - - // Stage6 - { - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp1_4); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); - - tmp1 = _mm_madd_epi16(lo_6_5, stg4_1); - tmp3 = _mm_madd_epi16(lo_6_5, stg4_0); - tmp0 = _mm_madd_epi16(lo_10_13, stg6_0); - tmp2 = _mm_madd_epi16(lo_10_13, stg4_0); - tmp4 = _mm_madd_epi16(lo_11_12, stg6_0); - tmp6 = _mm_madd_epi16(lo_11_12, stg4_0); - - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - - stp1_6 = _mm_packs_epi32(tmp3, tmp1); - - stp2_10 = _mm_packs_epi32(tmp0, zero); - stp2_13 = _mm_packs_epi32(tmp2, zero); - stp2_11 = _mm_packs_epi32(tmp4, zero); - stp2_12 = _mm_packs_epi32(tmp6, zero); - - tmp0 = _mm_add_epi16(stp1_0, stp1_4); - tmp1 = _mm_sub_epi16(stp1_0, stp1_4); - tmp2 = _mm_add_epi16(stp1_1, stp1_6); - tmp3 = _mm_sub_epi16(stp1_1, stp1_6); - - stp2_0 = _mm_unpackhi_epi64(tmp0, zero); - stp2_1 = _mm_unpacklo_epi64(tmp2, zero); - stp2_2 = _mm_unpackhi_epi64(tmp2, zero); - stp2_3 = _mm_unpacklo_epi64(tmp0, zero); - stp2_4 = _mm_unpacklo_epi64(tmp1, zero); - stp2_5 = _mm_unpackhi_epi64(tmp3, zero); - stp2_6 = _mm_unpacklo_epi64(tmp3, zero); - stp2_7 = _mm_unpackhi_epi64(tmp1, zero); - } - - // Stage7. Left 8x16 only. - l[0] = _mm_add_epi16(stp2_0, stp1_15); - l[1] = _mm_add_epi16(stp2_1, stp1_14); - l[2] = _mm_add_epi16(stp2_2, stp2_13); - l[3] = _mm_add_epi16(stp2_3, stp2_12); - l[4] = _mm_add_epi16(stp2_4, stp2_11); - l[5] = _mm_add_epi16(stp2_5, stp2_10); - l[6] = _mm_add_epi16(stp2_6, stp1_9); - l[7] = _mm_add_epi16(stp2_7, stp1_8); - l[8] = _mm_sub_epi16(stp2_7, stp1_8); - l[9] = _mm_sub_epi16(stp2_6, stp1_9); - l[10] = _mm_sub_epi16(stp2_5, stp2_10); - l[11] = _mm_sub_epi16(stp2_4, stp2_11); - l[12] = _mm_sub_epi16(stp2_3, stp2_12); - l[13] = _mm_sub_epi16(stp2_2, stp2_13); - l[14] = _mm_sub_epi16(stp2_1, stp1_14); - l[15] = _mm_sub_epi16(stp2_0, stp1_15); - - // Second 1-D inverse transform, performed per 8x16 block - for (i = 0; i < 2; i++) { - int j; - array_transpose_4X8(l + 8 * i, in); - - IDCT16_10 - - // Stage7 - in[0] = _mm_add_epi16(stp2_0, stp1_15); - in[1] = _mm_add_epi16(stp2_1, stp1_14); - in[2] = _mm_add_epi16(stp2_2, stp2_13); - in[3] = _mm_add_epi16(stp2_3, stp2_12); - in[4] = _mm_add_epi16(stp2_4, stp2_11); - in[5] = _mm_add_epi16(stp2_5, stp2_10); - in[6] = _mm_add_epi16(stp2_6, stp1_9); - in[7] = _mm_add_epi16(stp2_7, stp1_8); - in[8] = _mm_sub_epi16(stp2_7, stp1_8); - in[9] = _mm_sub_epi16(stp2_6, stp1_9); - in[10] = _mm_sub_epi16(stp2_5, stp2_10); - in[11] = _mm_sub_epi16(stp2_4, stp2_11); - in[12] = _mm_sub_epi16(stp2_3, stp2_12); - in[13] = _mm_sub_epi16(stp2_2, stp2_13); - in[14] = _mm_sub_epi16(stp2_1, stp1_14); - in[15] = _mm_sub_epi16(stp2_0, stp1_15); - - for (j = 0; j < 16; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -#define LOAD_DQCOEFF(reg, input) \ - { \ - reg = load_input_data(input); \ - input += 8; \ - } \ - -#define IDCT32_34 \ -/* Stage1 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \ - const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \ - \ - const __m128i lo_25_7= _mm_unpacklo_epi16(zero, in[7]); \ - const __m128i hi_25_7 = _mm_unpackhi_epi16(zero, in[7]); \ - \ - const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], zero); \ - const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], zero); \ - \ - const __m128i lo_29_3 = _mm_unpacklo_epi16(zero, in[3]); \ - const __m128i hi_29_3 = _mm_unpackhi_epi16(zero, in[3]); \ - \ - MULTIPLICATION_AND_ADD_2(lo_1_31, hi_1_31, stg1_0, \ - stg1_1, stp1_16, stp1_31); \ - MULTIPLICATION_AND_ADD_2(lo_25_7, hi_25_7, stg1_6, \ - stg1_7, stp1_19, stp1_28); \ - MULTIPLICATION_AND_ADD_2(lo_5_27, hi_5_27, stg1_8, \ - stg1_9, stp1_20, stp1_27); \ - MULTIPLICATION_AND_ADD_2(lo_29_3, hi_29_3, stg1_14, \ - stg1_15, stp1_23, stp1_24); \ -} \ -\ -/* Stage2 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \ - const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \ - \ - const __m128i lo_26_6 = _mm_unpacklo_epi16(zero, in[6]); \ - const __m128i hi_26_6 = _mm_unpackhi_epi16(zero, in[6]); \ - \ - MULTIPLICATION_AND_ADD_2(lo_2_30, hi_2_30, stg2_0, \ - stg2_1, stp2_8, stp2_15); \ - MULTIPLICATION_AND_ADD_2(lo_26_6, hi_26_6, stg2_6, \ - stg2_7, stp2_11, stp2_12); \ - \ - stp2_16 = stp1_16; \ - stp2_19 = stp1_19; \ - \ - stp2_20 = stp1_20; \ - stp2_23 = stp1_23; \ - \ - stp2_24 = stp1_24; \ - stp2_27 = stp1_27; \ - \ - stp2_28 = stp1_28; \ - stp2_31 = stp1_31; \ -} \ -\ -/* Stage3 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \ - const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \ - \ - const __m128i lo_17_30 = _mm_unpacklo_epi16(stp1_16, stp1_31); \ - const __m128i hi_17_30 = _mm_unpackhi_epi16(stp1_16, stp1_31); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp1_19, stp1_28); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp1_19, stp1_28); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp1_20, stp1_27); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp1_20, stp1_27); \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp1_23, stp1_24); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp1_23, stp2_24); \ - \ - MULTIPLICATION_AND_ADD_2(lo_4_28, hi_4_28, stg3_0, \ - stg3_1, stp1_4, stp1_7); \ - \ - stp1_8 = stp2_8; \ - stp1_11 = stp2_11; \ - stp1_12 = stp2_12; \ - stp1_15 = stp2_15; \ - \ - MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ - stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ - stp1_18, stp1_29) \ - MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ - stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ - stp1_22, stp1_25) \ - \ - stp1_16 = stp2_16; \ - stp1_31 = stp2_31; \ - stp1_19 = stp2_19; \ - stp1_20 = stp2_20; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_27 = stp2_27; \ - stp1_28 = stp2_28; \ -} \ -\ -/* Stage4 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \ - const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp2_15); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp2_8, stp2_15); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp2_12); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp2_11, stp2_12); \ - \ - MULTIPLICATION_AND_ADD_2(lo_0_16, hi_0_16, stg4_0, \ - stg4_1, stp2_0, stp2_1); \ - \ - stp2_4 = stp1_4; \ - stp2_5 = stp1_4; \ - stp2_6 = stp1_7; \ - stp2_7 = stp1_7; \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ - stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ - stp2_10, stp2_13) \ - \ - stp2_8 = stp1_8; \ - stp2_15 = stp1_15; \ - stp2_11 = stp1_11; \ - stp2_12 = stp1_12; \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ - stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ - stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ - stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ - stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ - stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ - stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ - stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ - stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ -} \ -\ -/* Stage5 */ \ -{ \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ - const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - stp1_0 = stp2_0; \ - stp1_1 = stp2_1; \ - stp1_2 = stp2_1; \ - stp1_3 = stp2_0; \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_4 = stp2_4; \ - stp1_7 = stp2_7; \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - \ - MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ - stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ - stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - \ - stp1_22 = stp2_22; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_25 = stp2_25; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} \ -\ -/* Stage6 */ \ -{ \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ - \ - stp2_8 = stp1_8; \ - stp2_9 = stp1_9; \ - stp2_14 = stp1_14; \ - stp2_15 = stp1_15; \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ - stp2_13, stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ - stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ - stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ - stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ - \ - stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ - stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ - stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ - stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ -} \ -\ -/* Stage7 */ \ -{ \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ - const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ - stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ - stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ - stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ - stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ - stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ - stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ - stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ - stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ - stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - stp1_18 = stp2_18; \ - stp1_19 = stp2_19; \ - \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ - \ - stp1_28 = stp2_28; \ - stp1_29 = stp2_29; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} - - -#define IDCT32 \ -/* Stage1 */ \ -{ \ - const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \ - const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], in[31]); \ - const __m128i lo_17_15 = _mm_unpacklo_epi16(in[17], in[15]); \ - const __m128i hi_17_15 = _mm_unpackhi_epi16(in[17], in[15]); \ - \ - const __m128i lo_9_23 = _mm_unpacklo_epi16(in[9], in[23]); \ - const __m128i hi_9_23 = _mm_unpackhi_epi16(in[9], in[23]); \ - const __m128i lo_25_7= _mm_unpacklo_epi16(in[25], in[7]); \ - const __m128i hi_25_7 = _mm_unpackhi_epi16(in[25], in[7]); \ - \ - const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], in[27]); \ - const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], in[27]); \ - const __m128i lo_21_11 = _mm_unpacklo_epi16(in[21], in[11]); \ - const __m128i hi_21_11 = _mm_unpackhi_epi16(in[21], in[11]); \ - \ - const __m128i lo_13_19 = _mm_unpacklo_epi16(in[13], in[19]); \ - const __m128i hi_13_19 = _mm_unpackhi_epi16(in[13], in[19]); \ - const __m128i lo_29_3 = _mm_unpacklo_epi16(in[29], in[3]); \ - const __m128i hi_29_3 = _mm_unpackhi_epi16(in[29], in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0, \ - stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, \ - stp1_17, stp1_30) \ - MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, \ - stg1_5, stg1_6, stg1_7, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8, \ - stg1_9, stg1_10, stg1_11, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12, \ - stg1_13, stg1_14, stg1_15, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ -} \ -\ -/* Stage2 */ \ -{ \ - const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], in[30]); \ - const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], in[30]); \ - const __m128i lo_18_14 = _mm_unpacklo_epi16(in[18], in[14]); \ - const __m128i hi_18_14 = _mm_unpackhi_epi16(in[18], in[14]); \ - \ - const __m128i lo_10_22 = _mm_unpacklo_epi16(in[10], in[22]); \ - const __m128i hi_10_22 = _mm_unpackhi_epi16(in[10], in[22]); \ - const __m128i lo_26_6 = _mm_unpacklo_epi16(in[26], in[6]); \ - const __m128i hi_26_6 = _mm_unpackhi_epi16(in[26], in[6]); \ - \ - MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0, \ - stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, \ - stp2_14) \ - MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4, \ - stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, \ - stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_17); \ - stp2_17 = _mm_sub_epi16(stp1_16, stp1_17); \ - stp2_18 = _mm_sub_epi16(stp1_19, stp1_18); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_18); \ - \ - stp2_20 = _mm_add_epi16(stp1_20, stp1_21); \ - stp2_21 = _mm_sub_epi16(stp1_20, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_23, stp1_22); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_22); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_25); \ - stp2_25 = _mm_sub_epi16(stp1_24, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_27, stp1_26); \ - stp2_27 = _mm_add_epi16(stp1_27, stp1_26); \ - \ - stp2_28 = _mm_add_epi16(stp1_28, stp1_29); \ - stp2_29 = _mm_sub_epi16(stp1_28, stp1_29); \ - stp2_30 = _mm_sub_epi16(stp1_31, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_31, stp1_30); \ -} \ -\ -/* Stage3 */ \ -{ \ - const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], in[28]); \ - const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], in[28]); \ - const __m128i lo_20_12 = _mm_unpacklo_epi16(in[20], in[12]); \ - const __m128i hi_20_12 = _mm_unpackhi_epi16(in[20], in[12]); \ - \ - const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30); \ - const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - \ - MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0, \ - stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, \ - stp1_6) \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_9); \ - stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ - stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ - stp1_12 = _mm_add_epi16(stp2_12, stp2_13); \ - stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ - \ - MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ - stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ - stp1_18, stp1_29) \ - MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ - stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ - stp1_22, stp1_25) \ - \ - stp1_16 = stp2_16; \ - stp1_31 = stp2_31; \ - stp1_19 = stp2_19; \ - stp1_20 = stp2_20; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_27 = stp2_27; \ - stp1_28 = stp2_28; \ -} \ -\ -/* Stage4 */ \ -{ \ - const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], in[16]); \ - const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], in[16]); \ - const __m128i lo_8_24 = _mm_unpacklo_epi16(in[8], in[24]); \ - const __m128i hi_8_24 = _mm_unpackhi_epi16(in[8], in[24]); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, \ - stg4_1, stg4_2, stg4_3, stp2_0, stp2_1, \ - stp2_2, stp2_3) \ - \ - stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ - stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ - stp2_10, stp2_13) \ - \ - stp2_8 = stp1_8; \ - stp2_15 = stp1_15; \ - stp2_11 = stp1_11; \ - stp2_12 = stp1_12; \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ - stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ - stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ - stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ - stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ - stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ - stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ - stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ - stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ -} \ -\ -/* Stage5 */ \ -{ \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ - const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_4 = stp2_4; \ - stp1_7 = stp2_7; \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - \ - MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ - stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ - stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - \ - stp1_22 = stp2_22; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_25 = stp2_25; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} \ -\ -/* Stage6 */ \ -{ \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ - \ - stp2_8 = stp1_8; \ - stp2_9 = stp1_9; \ - stp2_14 = stp1_14; \ - stp2_15 = stp1_15; \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ - stp2_13, stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ - stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ - stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ - stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ - \ - stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ - stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ - stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ - stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ -} \ -\ -/* Stage7 */ \ -{ \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ - const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ - stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ - stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ - stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ - stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ - stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ - stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ - stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ - stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ - stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - stp1_18 = stp2_18; \ - stp1_19 = stp2_19; \ - \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ - \ - stp1_28 = stp2_28; \ - stp1_29 = stp2_29; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} - -// Only upper-left 8x8 has non-zero coeff -void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<5); - - // idct constants for each stage - const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); - const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); - const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); - const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); - const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); - const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); - const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); - const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); - const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); - const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in[32], col[32]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, - stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, - stp1_30, stp1_31; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, - stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, - stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, - stp2_30, stp2_31; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - - // Load input data. Only need to load the top left 8x8 block. - in[0] = load_input_data(input); - in[1] = load_input_data(input + 32); - in[2] = load_input_data(input + 64); - in[3] = load_input_data(input + 96); - in[4] = load_input_data(input + 128); - in[5] = load_input_data(input + 160); - in[6] = load_input_data(input + 192); - in[7] = load_input_data(input + 224); - - for (i = 8; i < 32; ++i) { - in[i] = _mm_setzero_si128(); - } - - array_transpose_8x8(in, in); - // TODO(hkuang): Following transposes are unnecessary. But remove them will - // lead to performance drop on some devices. - array_transpose_8x8(in + 8, in + 8); - array_transpose_8x8(in + 16, in + 16); - array_transpose_8x8(in + 24, in + 24); - - IDCT32_34 - - // 1_D: Store 32 intermediate results for each 8x32 block. - col[0] = _mm_add_epi16(stp1_0, stp1_31); - col[1] = _mm_add_epi16(stp1_1, stp1_30); - col[2] = _mm_add_epi16(stp1_2, stp1_29); - col[3] = _mm_add_epi16(stp1_3, stp1_28); - col[4] = _mm_add_epi16(stp1_4, stp1_27); - col[5] = _mm_add_epi16(stp1_5, stp1_26); - col[6] = _mm_add_epi16(stp1_6, stp1_25); - col[7] = _mm_add_epi16(stp1_7, stp1_24); - col[8] = _mm_add_epi16(stp1_8, stp1_23); - col[9] = _mm_add_epi16(stp1_9, stp1_22); - col[10] = _mm_add_epi16(stp1_10, stp1_21); - col[11] = _mm_add_epi16(stp1_11, stp1_20); - col[12] = _mm_add_epi16(stp1_12, stp1_19); - col[13] = _mm_add_epi16(stp1_13, stp1_18); - col[14] = _mm_add_epi16(stp1_14, stp1_17); - col[15] = _mm_add_epi16(stp1_15, stp1_16); - col[16] = _mm_sub_epi16(stp1_15, stp1_16); - col[17] = _mm_sub_epi16(stp1_14, stp1_17); - col[18] = _mm_sub_epi16(stp1_13, stp1_18); - col[19] = _mm_sub_epi16(stp1_12, stp1_19); - col[20] = _mm_sub_epi16(stp1_11, stp1_20); - col[21] = _mm_sub_epi16(stp1_10, stp1_21); - col[22] = _mm_sub_epi16(stp1_9, stp1_22); - col[23] = _mm_sub_epi16(stp1_8, stp1_23); - col[24] = _mm_sub_epi16(stp1_7, stp1_24); - col[25] = _mm_sub_epi16(stp1_6, stp1_25); - col[26] = _mm_sub_epi16(stp1_5, stp1_26); - col[27] = _mm_sub_epi16(stp1_4, stp1_27); - col[28] = _mm_sub_epi16(stp1_3, stp1_28); - col[29] = _mm_sub_epi16(stp1_2, stp1_29); - col[30] = _mm_sub_epi16(stp1_1, stp1_30); - col[31] = _mm_sub_epi16(stp1_0, stp1_31); - for (i = 0; i < 4; i++) { - int j; - const __m128i zero = _mm_setzero_si128(); - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(col + i * 8, in); - IDCT32_34 - - // 2_D: Calculate the results and store them to destination. - in[0] = _mm_add_epi16(stp1_0, stp1_31); - in[1] = _mm_add_epi16(stp1_1, stp1_30); - in[2] = _mm_add_epi16(stp1_2, stp1_29); - in[3] = _mm_add_epi16(stp1_3, stp1_28); - in[4] = _mm_add_epi16(stp1_4, stp1_27); - in[5] = _mm_add_epi16(stp1_5, stp1_26); - in[6] = _mm_add_epi16(stp1_6, stp1_25); - in[7] = _mm_add_epi16(stp1_7, stp1_24); - in[8] = _mm_add_epi16(stp1_8, stp1_23); - in[9] = _mm_add_epi16(stp1_9, stp1_22); - in[10] = _mm_add_epi16(stp1_10, stp1_21); - in[11] = _mm_add_epi16(stp1_11, stp1_20); - in[12] = _mm_add_epi16(stp1_12, stp1_19); - in[13] = _mm_add_epi16(stp1_13, stp1_18); - in[14] = _mm_add_epi16(stp1_14, stp1_17); - in[15] = _mm_add_epi16(stp1_15, stp1_16); - in[16] = _mm_sub_epi16(stp1_15, stp1_16); - in[17] = _mm_sub_epi16(stp1_14, stp1_17); - in[18] = _mm_sub_epi16(stp1_13, stp1_18); - in[19] = _mm_sub_epi16(stp1_12, stp1_19); - in[20] = _mm_sub_epi16(stp1_11, stp1_20); - in[21] = _mm_sub_epi16(stp1_10, stp1_21); - in[22] = _mm_sub_epi16(stp1_9, stp1_22); - in[23] = _mm_sub_epi16(stp1_8, stp1_23); - in[24] = _mm_sub_epi16(stp1_7, stp1_24); - in[25] = _mm_sub_epi16(stp1_6, stp1_25); - in[26] = _mm_sub_epi16(stp1_5, stp1_26); - in[27] = _mm_sub_epi16(stp1_4, stp1_27); - in[28] = _mm_sub_epi16(stp1_3, stp1_28); - in[29] = _mm_sub_epi16(stp1_2, stp1_29); - in[30] = _mm_sub_epi16(stp1_1, stp1_30); - in[31] = _mm_sub_epi16(stp1_0, stp1_31); - - for (j = 0; j < 32; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 5); - const __m128i zero = _mm_setzero_si128(); - - // idct constants for each stage - const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); - const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); - const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64); - const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64); - const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64); - const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64); - const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); - const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); - const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); - const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); - const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64); - const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64); - const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64); - const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64); - const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); - const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); - const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); - const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in[32], col[128], zero_idx[16]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, - stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, - stp1_30, stp1_31; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, - stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, - stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, - stp2_30, stp2_31; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i, j, i32; - - for (i = 0; i < 4; i++) { - i32 = (i << 5); - // First 1-D idct - // Load input data. - LOAD_DQCOEFF(in[0], input); - LOAD_DQCOEFF(in[8], input); - LOAD_DQCOEFF(in[16], input); - LOAD_DQCOEFF(in[24], input); - LOAD_DQCOEFF(in[1], input); - LOAD_DQCOEFF(in[9], input); - LOAD_DQCOEFF(in[17], input); - LOAD_DQCOEFF(in[25], input); - LOAD_DQCOEFF(in[2], input); - LOAD_DQCOEFF(in[10], input); - LOAD_DQCOEFF(in[18], input); - LOAD_DQCOEFF(in[26], input); - LOAD_DQCOEFF(in[3], input); - LOAD_DQCOEFF(in[11], input); - LOAD_DQCOEFF(in[19], input); - LOAD_DQCOEFF(in[27], input); - - LOAD_DQCOEFF(in[4], input); - LOAD_DQCOEFF(in[12], input); - LOAD_DQCOEFF(in[20], input); - LOAD_DQCOEFF(in[28], input); - LOAD_DQCOEFF(in[5], input); - LOAD_DQCOEFF(in[13], input); - LOAD_DQCOEFF(in[21], input); - LOAD_DQCOEFF(in[29], input); - LOAD_DQCOEFF(in[6], input); - LOAD_DQCOEFF(in[14], input); - LOAD_DQCOEFF(in[22], input); - LOAD_DQCOEFF(in[30], input); - LOAD_DQCOEFF(in[7], input); - LOAD_DQCOEFF(in[15], input); - LOAD_DQCOEFF(in[23], input); - LOAD_DQCOEFF(in[31], input); - - // checking if all entries are zero - zero_idx[0] = _mm_or_si128(in[0], in[1]); - zero_idx[1] = _mm_or_si128(in[2], in[3]); - zero_idx[2] = _mm_or_si128(in[4], in[5]); - zero_idx[3] = _mm_or_si128(in[6], in[7]); - zero_idx[4] = _mm_or_si128(in[8], in[9]); - zero_idx[5] = _mm_or_si128(in[10], in[11]); - zero_idx[6] = _mm_or_si128(in[12], in[13]); - zero_idx[7] = _mm_or_si128(in[14], in[15]); - zero_idx[8] = _mm_or_si128(in[16], in[17]); - zero_idx[9] = _mm_or_si128(in[18], in[19]); - zero_idx[10] = _mm_or_si128(in[20], in[21]); - zero_idx[11] = _mm_or_si128(in[22], in[23]); - zero_idx[12] = _mm_or_si128(in[24], in[25]); - zero_idx[13] = _mm_or_si128(in[26], in[27]); - zero_idx[14] = _mm_or_si128(in[28], in[29]); - zero_idx[15] = _mm_or_si128(in[30], in[31]); - - zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]); - zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]); - zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]); - zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]); - zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]); - zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]); - zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]); - zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]); - - zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]); - zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]); - zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]); - zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]); - zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]); - zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]); - zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]); - - if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) { - col[i32 + 0] = _mm_setzero_si128(); - col[i32 + 1] = _mm_setzero_si128(); - col[i32 + 2] = _mm_setzero_si128(); - col[i32 + 3] = _mm_setzero_si128(); - col[i32 + 4] = _mm_setzero_si128(); - col[i32 + 5] = _mm_setzero_si128(); - col[i32 + 6] = _mm_setzero_si128(); - col[i32 + 7] = _mm_setzero_si128(); - col[i32 + 8] = _mm_setzero_si128(); - col[i32 + 9] = _mm_setzero_si128(); - col[i32 + 10] = _mm_setzero_si128(); - col[i32 + 11] = _mm_setzero_si128(); - col[i32 + 12] = _mm_setzero_si128(); - col[i32 + 13] = _mm_setzero_si128(); - col[i32 + 14] = _mm_setzero_si128(); - col[i32 + 15] = _mm_setzero_si128(); - col[i32 + 16] = _mm_setzero_si128(); - col[i32 + 17] = _mm_setzero_si128(); - col[i32 + 18] = _mm_setzero_si128(); - col[i32 + 19] = _mm_setzero_si128(); - col[i32 + 20] = _mm_setzero_si128(); - col[i32 + 21] = _mm_setzero_si128(); - col[i32 + 22] = _mm_setzero_si128(); - col[i32 + 23] = _mm_setzero_si128(); - col[i32 + 24] = _mm_setzero_si128(); - col[i32 + 25] = _mm_setzero_si128(); - col[i32 + 26] = _mm_setzero_si128(); - col[i32 + 27] = _mm_setzero_si128(); - col[i32 + 28] = _mm_setzero_si128(); - col[i32 + 29] = _mm_setzero_si128(); - col[i32 + 30] = _mm_setzero_si128(); - col[i32 + 31] = _mm_setzero_si128(); - continue; - } - - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(in, in); - array_transpose_8x8(in + 8, in + 8); - array_transpose_8x8(in + 16, in + 16); - array_transpose_8x8(in + 24, in + 24); - - IDCT32 - - // 1_D: Store 32 intermediate results for each 8x32 block. - col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); - col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30); - col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29); - col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28); - col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27); - col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26); - col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25); - col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24); - col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23); - col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22); - col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21); - col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20); - col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19); - col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18); - col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17); - col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16); - col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); - col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); - col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); - col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); - col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); - col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); - col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); - col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); - col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); - col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); - col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); - col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); - col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); - col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); - col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); - col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); - } - for (i = 0; i < 4; i++) { - // Second 1-D idct - j = i << 3; - - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(col + j, in); - array_transpose_8x8(col + j + 32, in + 8); - array_transpose_8x8(col + j + 64, in + 16); - array_transpose_8x8(col + j + 96, in + 24); - - IDCT32 - - // 2_D: Calculate the results and store them to destination. - in[0] = _mm_add_epi16(stp1_0, stp1_31); - in[1] = _mm_add_epi16(stp1_1, stp1_30); - in[2] = _mm_add_epi16(stp1_2, stp1_29); - in[3] = _mm_add_epi16(stp1_3, stp1_28); - in[4] = _mm_add_epi16(stp1_4, stp1_27); - in[5] = _mm_add_epi16(stp1_5, stp1_26); - in[6] = _mm_add_epi16(stp1_6, stp1_25); - in[7] = _mm_add_epi16(stp1_7, stp1_24); - in[8] = _mm_add_epi16(stp1_8, stp1_23); - in[9] = _mm_add_epi16(stp1_9, stp1_22); - in[10] = _mm_add_epi16(stp1_10, stp1_21); - in[11] = _mm_add_epi16(stp1_11, stp1_20); - in[12] = _mm_add_epi16(stp1_12, stp1_19); - in[13] = _mm_add_epi16(stp1_13, stp1_18); - in[14] = _mm_add_epi16(stp1_14, stp1_17); - in[15] = _mm_add_epi16(stp1_15, stp1_16); - in[16] = _mm_sub_epi16(stp1_15, stp1_16); - in[17] = _mm_sub_epi16(stp1_14, stp1_17); - in[18] = _mm_sub_epi16(stp1_13, stp1_18); - in[19] = _mm_sub_epi16(stp1_12, stp1_19); - in[20] = _mm_sub_epi16(stp1_11, stp1_20); - in[21] = _mm_sub_epi16(stp1_10, stp1_21); - in[22] = _mm_sub_epi16(stp1_9, stp1_22); - in[23] = _mm_sub_epi16(stp1_8, stp1_23); - in[24] = _mm_sub_epi16(stp1_7, stp1_24); - in[25] = _mm_sub_epi16(stp1_6, stp1_25); - in[26] = _mm_sub_epi16(stp1_5, stp1_26); - in[27] = _mm_sub_epi16(stp1_4, stp1_27); - in[28] = _mm_sub_epi16(stp1_3, stp1_28); - in[29] = _mm_sub_epi16(stp1_2, stp1_29); - in[30] = _mm_sub_epi16(stp1_1, stp1_30); - in[31] = _mm_sub_epi16(stp1_0, stp1_31); - - for (j = 0; j < 32; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a, j; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 6); - - dc_value = _mm_set1_epi16(a); - - for (j = 0; j < 32; ++j) { - RECON_AND_STORE(dest + 0 + j * stride, dc_value); - RECON_AND_STORE(dest + 8 + j * stride, dc_value); - RECON_AND_STORE(dest + 16 + j * stride, dc_value); - RECON_AND_STORE(dest + 24 + j * stride, dc_value); - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE __m128i clamp_high_sse2(__m128i value, int bd) { - __m128i ubounded, retval; - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi16(1); - const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one); - ubounded = _mm_cmpgt_epi16(value, max); - retval = _mm_andnot_si128(ubounded, value); - ubounded = _mm_and_si128(ubounded, max); - retval = _mm_or_si128(retval, ubounded); - retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero)); - return retval; -} - -void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - int i, j; - __m128i inptr[4]; - __m128i sign_bits[2]; - __m128i temp_mm, min_input, max_input; - int test; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - int optimised_cols = 0; - const __m128i zero = _mm_set1_epi16(0); - const __m128i eight = _mm_set1_epi16(8); - const __m128i max = _mm_set1_epi16(12043); - const __m128i min = _mm_set1_epi16(-12043); - // Load input into __m128i - inptr[0] = _mm_loadu_si128((const __m128i *)input); - inptr[1] = _mm_loadu_si128((const __m128i *)(input + 4)); - inptr[2] = _mm_loadu_si128((const __m128i *)(input + 8)); - inptr[3] = _mm_loadu_si128((const __m128i *)(input + 12)); - - // Pack to 16 bits - inptr[0] = _mm_packs_epi32(inptr[0], inptr[1]); - inptr[1] = _mm_packs_epi32(inptr[2], inptr[3]); - - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp_mm = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp_mm); - - if (!test) { - // Do the row transform - idct4_sse2(inptr); - - // Check the min & max values - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp_mm = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp_mm); - - if (test) { - transpose_4x4(inptr); - sign_bits[0] = _mm_cmplt_epi16(inptr[0], zero); - sign_bits[1] = _mm_cmplt_epi16(inptr[1], zero); - inptr[3] = _mm_unpackhi_epi16(inptr[1], sign_bits[1]); - inptr[2] = _mm_unpacklo_epi16(inptr[1], sign_bits[1]); - inptr[1] = _mm_unpackhi_epi16(inptr[0], sign_bits[0]); - inptr[0] = _mm_unpacklo_epi16(inptr[0], sign_bits[0]); - _mm_storeu_si128((__m128i *)outptr, inptr[0]); - _mm_storeu_si128((__m128i *)(outptr + 4), inptr[1]); - _mm_storeu_si128((__m128i *)(outptr + 8), inptr[2]); - _mm_storeu_si128((__m128i *)(outptr + 12), inptr[3]); - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 4; ++i) { - vpx_highbd_idct4_c(input, outptr, bd); - input += 4; - outptr += 4; - } - } - - if (optimised_cols) { - idct4_sse2(inptr); - - // Final round and shift - inptr[0] = _mm_add_epi16(inptr[0], eight); - inptr[1] = _mm_add_epi16(inptr[1], eight); - - inptr[0] = _mm_srai_epi16(inptr[0], 4); - inptr[1] = _mm_srai_epi16(inptr[1], 4); - - // Reconstruction and Store - { - __m128i d0 = _mm_loadl_epi64((const __m128i *)dest); - __m128i d2 = _mm_loadl_epi64((const __m128i *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi64( - d0, _mm_loadl_epi64((const __m128i *)(dest + stride))); - d2 = _mm_unpacklo_epi64( - d2, _mm_loadl_epi64((const __m128i *)(dest + stride * 3))); - d0 = clamp_high_sse2(_mm_adds_epi16(d0, inptr[0]), bd); - d2 = clamp_high_sse2(_mm_adds_epi16(d2, inptr[1]), bd); - // store input0 - _mm_storel_epi64((__m128i *)dest, d0); - // store input1 - d0 = _mm_srli_si128(d0, 8); - _mm_storel_epi64((__m128i *)(dest + stride), d0); - // store input2 - _mm_storel_epi64((__m128i *)(dest + stride * 2), d2); - // store input3 - d2 = _mm_srli_si128(d2, 8); - _mm_storel_epi64((__m128i *)(dest + stride * 3), d2); - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[4], temp_out[4]; - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - vpx_highbd_idct4_c(temp_in, temp_out, bd); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); - } - } - } -} - -void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[8]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i sixteen = _mm_set1_epi16(16); - const __m128i max = _mm_set1_epi16(6201); - const __m128i min = _mm_set1_epi16(-6201); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 8; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 8; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform - idct8_sse2(inptr); - - // Find the min & max for the column transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 8; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - array_transpose_8x8(inptr, inptr); - for (i = 0; i < 8; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); - temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 8; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - } - - if (optimised_cols) { - idct8_sse2(inptr); - - // Final round & shift and Reconstruction and Store - { - __m128i d[8]; - for (i = 0; i < 8; i++) { - inptr[i] = _mm_add_epi16(inptr[i], sixteen); - d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - inptr[i] = _mm_srai_epi16(inptr[i], 5); - d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[8], temp_out[8]; - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } - } -} - -void vpx_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8] = { 0 }; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[8]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i sixteen = _mm_set1_epi16(16); - const __m128i max = _mm_set1_epi16(6201); - const __m128i min = _mm_set1_epi16(-6201); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 8; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - // only first 4 row has non-zero coefs - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 4; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform - idct8_sse2(inptr); - - // Find the min & max for the column transform - // N.B. Only first 4 cols contain non-zero coeffs - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 8; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - // Use fact only first 4 rows contain non-zero coeffs - array_transpose_4X8(inptr, inptr); - for (i = 0; i < 4; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); - temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 4; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - } - - if (optimised_cols) { - idct8_sse2(inptr); - - // Final round & shift and Reconstruction and Store - { - __m128i d[8]; - for (i = 0; i < 8; i++) { - inptr[i] = _mm_add_epi16(inptr[i], sixteen); - d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - inptr[i] = _mm_srai_epi16(inptr[i], 5); - d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[8], temp_out[8]; - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } - } -} - -void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[32]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i rounding = _mm_set1_epi16(32); - const __m128i max = _mm_set1_epi16(3155); - const __m128i min = _mm_set1_epi16(-3155); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 16; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); - inptr[i + 16] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 32; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform - idct16_sse2(inptr, inptr + 16); - - // Find the min & max for the column transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 32; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - array_transpose_16x16(inptr, inptr + 16); - for (i = 0; i < 16; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); - sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); - temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 16; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - } - - if (optimised_cols) { - idct16_sse2(inptr, inptr + 16); - - // Final round & shift and Reconstruction and Store - { - __m128i d[2]; - for (i = 0; i < 16; i++) { - inptr[i ] = _mm_add_epi16(inptr[i ], rounding); - inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); - d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); - inptr[i ] = _mm_srai_epi16(inptr[i ], 6); - inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); - d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); - d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); - _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[16], temp_out[16]; - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } - } -} - -void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16] = { 0 }; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[32]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i rounding = _mm_set1_epi16(32); - const __m128i max = _mm_set1_epi16(3155); - const __m128i min = _mm_set1_epi16(-3155); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 16; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); - inptr[i + 16] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - // Since all non-zero dct coefficients are in upper-left 4x4 area, - // we only need to consider first 4 rows here. - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 4; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform (N.B. This transposes inptr) - idct16_sse2(inptr, inptr + 16); - - // Find the min & max for the column transform - // N.B. Only first 4 cols contain non-zero coeffs - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 16; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - // Use fact only first 4 rows contain non-zero coeffs - array_transpose_8x8(inptr, inptr); - array_transpose_8x8(inptr + 8, inptr + 16); - for (i = 0; i < 4; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); - sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); - temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 4; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - } - - if (optimised_cols) { - idct16_sse2(inptr, inptr + 16); - - // Final round & shift and Reconstruction and Store - { - __m128i d[2]; - for (i = 0; i < 16; i++) { - inptr[i ] = _mm_add_epi16(inptr[i ], rounding); - inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); - d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); - inptr[i ] = _mm_srai_epi16(inptr[i ], 6); - inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); - d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); - d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); - _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[16], temp_out[16]; - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h deleted file mode 100644 index bd520c18e5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_ -#define VPX_DSP_X86_INV_TXFM_SSE2_H_ - -#include // SSE2 -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/inv_txfm.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" - -// perform 8x8 transpose -static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { - const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); - const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); - const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]); - const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]); - const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); - const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); - const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]); - const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]); - - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3); - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3); - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); - - res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1); - res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1); - res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3); - res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3); - res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5); - res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5); - res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7); - res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7); -} - -#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - \ - in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \ - in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \ - } - -static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { - const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); - const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); - const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); - const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); - - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); - - out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4); - out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4); - out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6); - out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6); -} - -static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { - __m128i tbuf[8]; - array_transpose_8x8(res0, res0); - array_transpose_8x8(res1, tbuf); - array_transpose_8x8(res0 + 8, res1); - array_transpose_8x8(res1 + 8, res1 + 8); - - res0[8] = tbuf[0]; - res0[9] = tbuf[1]; - res0[10] = tbuf[2]; - res0[11] = tbuf[3]; - res0[12] = tbuf[4]; - res0[13] = tbuf[5]; - res0[14] = tbuf[6]; - res0[15] = tbuf[7]; -} - -// Function to allow 8 bit optimisations to be used when profile 0 is used with -// highbitdepth enabled -static INLINE __m128i load_input_data(const tran_low_t *data) { -#if CONFIG_VP9_HIGHBITDEPTH - return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5], - data[6], data[7]); -#else - return _mm_load_si128((const __m128i *)data); -#endif -} - -static INLINE void load_buffer_8x16(const tran_low_t *input, __m128i *in) { - in[0] = load_input_data(input + 0 * 16); - in[1] = load_input_data(input + 1 * 16); - in[2] = load_input_data(input + 2 * 16); - in[3] = load_input_data(input + 3 * 16); - in[4] = load_input_data(input + 4 * 16); - in[5] = load_input_data(input + 5 * 16); - in[6] = load_input_data(input + 6 * 16); - in[7] = load_input_data(input + 7 * 16); - - in[8] = load_input_data(input + 8 * 16); - in[9] = load_input_data(input + 9 * 16); - in[10] = load_input_data(input + 10 * 16); - in[11] = load_input_data(input + 11 * 16); - in[12] = load_input_data(input + 12 * 16); - in[13] = load_input_data(input + 13 * 16); - in[14] = load_input_data(input + 14 * 16); - in[15] = load_input_data(input + 15 * 16); -} - -#define RECON_AND_STORE(dest, in_x) \ - { \ - __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - _mm_storel_epi64((__m128i *)(dest), d0); \ - } - -static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { - const __m128i final_rounding = _mm_set1_epi16(1<<5); - const __m128i zero = _mm_setzero_si128(); - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - - RECON_AND_STORE(dest + 0 * stride, in[0]); - RECON_AND_STORE(dest + 1 * stride, in[1]); - RECON_AND_STORE(dest + 2 * stride, in[2]); - RECON_AND_STORE(dest + 3 * stride, in[3]); - RECON_AND_STORE(dest + 4 * stride, in[4]); - RECON_AND_STORE(dest + 5 * stride, in[5]); - RECON_AND_STORE(dest + 6 * stride, in[6]); - RECON_AND_STORE(dest + 7 * stride, in[7]); - RECON_AND_STORE(dest + 8 * stride, in[8]); - RECON_AND_STORE(dest + 9 * stride, in[9]); - RECON_AND_STORE(dest + 10 * stride, in[10]); - RECON_AND_STORE(dest + 11 * stride, in[11]); - RECON_AND_STORE(dest + 12 * stride, in[12]); - RECON_AND_STORE(dest + 13 * stride, in[13]); - RECON_AND_STORE(dest + 14 * stride, in[14]); - RECON_AND_STORE(dest + 15 * stride, in[15]); -} - -void idct4_sse2(__m128i *in); -void idct8_sse2(__m128i *in); -void idct16_sse2(__m128i *in0, __m128i *in1); -void iadst4_sse2(__m128i *in); -void iadst8_sse2(__m128i *in); -void iadst16_sse2(__m128i *in0, __m128i *in1); - -#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm deleted file mode 100644 index 20baf820f6..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm +++ /dev/null @@ -1,1793 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -; This file provides SSSE3 version of the inverse transformation. Part -; of the functions are originally derived from the ffmpeg project. -; Note that the current version applies to x86 64-bit only. - -SECTION_RODATA - -pw_11585x2: times 8 dw 23170 - -pw_m2404x2: times 8 dw -2404*2 -pw_m4756x2: times 8 dw -4756*2 -pw_m5520x2: times 8 dw -5520*2 -pw_m8423x2: times 8 dw -8423*2 -pw_m9102x2: times 8 dw -9102*2 -pw_m10394x2: times 8 dw -10394*2 -pw_m11003x2: times 8 dw -11003*2 - -pw_16364x2: times 8 dw 16364*2 -pw_16305x2: times 8 dw 16305*2 -pw_16207x2: times 8 dw 16207*2 -pw_16069x2: times 8 dw 16069*2 -pw_15893x2: times 8 dw 15893*2 -pw_15679x2: times 8 dw 15679*2 -pw_15426x2: times 8 dw 15426*2 -pw_15137x2: times 8 dw 15137*2 -pw_14811x2: times 8 dw 14811*2 -pw_14449x2: times 8 dw 14449*2 -pw_14053x2: times 8 dw 14053*2 -pw_13623x2: times 8 dw 13623*2 -pw_13160x2: times 8 dw 13160*2 -pw_12665x2: times 8 dw 12665*2 -pw_12140x2: times 8 dw 12140*2 -pw__9760x2: times 8 dw 9760*2 -pw__7723x2: times 8 dw 7723*2 -pw__7005x2: times 8 dw 7005*2 -pw__6270x2: times 8 dw 6270*2 -pw__3981x2: times 8 dw 3981*2 -pw__3196x2: times 8 dw 3196*2 -pw__1606x2: times 8 dw 1606*2 -pw___804x2: times 8 dw 804*2 - -pd_8192: times 4 dd 8192 -pw_32: times 8 dw 32 -pw_16: times 8 dw 16 - -%macro TRANSFORM_COEFFS 2 -pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 -pw_m%2_%1: dw -%2, %1, -%2, %1, -%2, %1, -%2, %1 -pw_m%1_m%2: dw -%1, -%2, -%1, -%2, -%1, -%2, -%1, -%2 -%endmacro - -TRANSFORM_COEFFS 6270, 15137 -TRANSFORM_COEFFS 3196, 16069 -TRANSFORM_COEFFS 13623, 9102 - -; constants for 32x32_34 -TRANSFORM_COEFFS 804, 16364 -TRANSFORM_COEFFS 15426, 5520 -TRANSFORM_COEFFS 3981, 15893 -TRANSFORM_COEFFS 16207, 2404 -TRANSFORM_COEFFS 1606, 16305 -TRANSFORM_COEFFS 15679, 4756 -TRANSFORM_COEFFS 11585, 11585 - -; constants for 32x32_1024 -TRANSFORM_COEFFS 12140, 11003 -TRANSFORM_COEFFS 7005, 14811 -TRANSFORM_COEFFS 14053, 8423 -TRANSFORM_COEFFS 9760, 13160 -TRANSFORM_COEFFS 12665, 10394 -TRANSFORM_COEFFS 7723, 14449 - -%macro PAIR_PP_COEFFS 2 -dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2 -%endmacro - -%macro PAIR_MP_COEFFS 2 -dpw_m%1_%2: dw -%1, -%1, -%1, -%1, %2, %2, %2, %2 -%endmacro - -%macro PAIR_MM_COEFFS 2 -dpw_m%1_m%2: dw -%1, -%1, -%1, -%1, -%2, -%2, -%2, -%2 -%endmacro - -PAIR_PP_COEFFS 30274, 12540 -PAIR_PP_COEFFS 6392, 32138 -PAIR_MP_COEFFS 18204, 27246 - -PAIR_PP_COEFFS 12540, 12540 -PAIR_PP_COEFFS 30274, 30274 -PAIR_PP_COEFFS 6392, 6392 -PAIR_PP_COEFFS 32138, 32138 -PAIR_MM_COEFFS 18204, 18204 -PAIR_PP_COEFFS 27246, 27246 - -SECTION .text - -%if ARCH_X86_64 -%macro SUM_SUB 3 - psubw m%3, m%1, m%2 - paddw m%1, m%2 - SWAP %2, %3 -%endmacro - -; butterfly operation -%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2 - pmaddwd m%1, m%3, %5 - pmaddwd m%2, m%3, %6 - paddd m%1, %4 - paddd m%2, %4 - psrad m%1, 14 - psrad m%2, 14 -%endmacro - -%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 - punpckhwd m%6, m%2, m%1 - MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_%3_%4] - punpcklwd m%2, m%1 - MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_%3_%4] - packssdw m%1, m%7 - packssdw m%2, m%6 -%endmacro - -%macro BUTTERFLY_4Xmm 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 - punpckhwd m%6, m%2, m%1 - MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_m%3_m%4] - punpcklwd m%2, m%1 - MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_m%3_m%4] - packssdw m%1, m%7 - packssdw m%2, m%6 -%endmacro - -; matrix transpose -%macro INTERLEAVE_2X 4 - punpckh%1 m%4, m%2, m%3 - punpckl%1 m%2, m%3 - SWAP %3, %4 -%endmacro - -%macro TRANSPOSE8X8 9 - INTERLEAVE_2X wd, %1, %2, %9 - INTERLEAVE_2X wd, %3, %4, %9 - INTERLEAVE_2X wd, %5, %6, %9 - INTERLEAVE_2X wd, %7, %8, %9 - - INTERLEAVE_2X dq, %1, %3, %9 - INTERLEAVE_2X dq, %2, %4, %9 - INTERLEAVE_2X dq, %5, %7, %9 - INTERLEAVE_2X dq, %6, %8, %9 - - INTERLEAVE_2X qdq, %1, %5, %9 - INTERLEAVE_2X qdq, %3, %7, %9 - INTERLEAVE_2X qdq, %2, %6, %9 - INTERLEAVE_2X qdq, %4, %8, %9 - - SWAP %2, %5 - SWAP %4, %7 -%endmacro - -%macro IDCT8_1D 0 - SUM_SUB 0, 4, 9 - BUTTERFLY_4X 2, 6, 6270, 15137, m8, 9, 10 - pmulhrsw m0, m12 - pmulhrsw m4, m12 - BUTTERFLY_4X 1, 7, 3196, 16069, m8, 9, 10 - BUTTERFLY_4X 5, 3, 13623, 9102, m8, 9, 10 - - SUM_SUB 1, 5, 9 - SUM_SUB 7, 3, 9 - SUM_SUB 0, 6, 9 - SUM_SUB 4, 2, 9 - SUM_SUB 3, 5, 9 - pmulhrsw m3, m12 - pmulhrsw m5, m12 - - SUM_SUB 0, 7, 9 - SUM_SUB 4, 3, 9 - SUM_SUB 2, 5, 9 - SUM_SUB 6, 1, 9 - - SWAP 3, 6 - SWAP 1, 4 -%endmacro - -; This macro handles 8 pixels per line -%macro ADD_STORE_8P_2X 5; src1, src2, tmp1, tmp2, zero - paddw m%1, m11 - paddw m%2, m11 - psraw m%1, 5 - psraw m%2, 5 - - movh m%3, [outputq] - movh m%4, [outputq + strideq] - punpcklbw m%3, m%5 - punpcklbw m%4, m%5 - paddw m%3, m%1 - paddw m%4, m%2 - packuswb m%3, m%5 - packuswb m%4, m%5 - movh [outputq], m%3 - movh [outputq + strideq], m%4 -%endmacro - -INIT_XMM ssse3 -; full inverse 8x8 2D-DCT transform -cglobal idct8x8_64_add, 3, 5, 13, input, output, stride - mova m8, [pd_8192] - mova m11, [pw_16] - mova m12, [pw_11585x2] - - lea r3, [2 * strideq] -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [inputq + 0] - packssdw m0, [inputq + 16] - mova m1, [inputq + 32] - packssdw m1, [inputq + 48] - mova m2, [inputq + 64] - packssdw m2, [inputq + 80] - mova m3, [inputq + 96] - packssdw m3, [inputq + 112] - mova m4, [inputq + 128] - packssdw m4, [inputq + 144] - mova m5, [inputq + 160] - packssdw m5, [inputq + 176] - mova m6, [inputq + 192] - packssdw m6, [inputq + 208] - mova m7, [inputq + 224] - packssdw m7, [inputq + 240] -%else - mova m0, [inputq + 0] - mova m1, [inputq + 16] - mova m2, [inputq + 32] - mova m3, [inputq + 48] - mova m4, [inputq + 64] - mova m5, [inputq + 80] - mova m6, [inputq + 96] - mova m7, [inputq + 112] -%endif - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - IDCT8_1D - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - IDCT8_1D - - pxor m12, m12 - ADD_STORE_8P_2X 0, 1, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 2, 3, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 4, 5, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 6, 7, 9, 10, 12 - - RET - -; inverse 8x8 2D-DCT transform with only first 10 coeffs non-zero -cglobal idct8x8_12_add, 3, 5, 13, input, output, stride - mova m8, [pd_8192] - mova m11, [pw_16] - mova m12, [pw_11585x2] - - lea r3, [2 * strideq] - -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [inputq + 0] - packssdw m0, [inputq + 16] - mova m1, [inputq + 32] - packssdw m1, [inputq + 48] - mova m2, [inputq + 64] - packssdw m2, [inputq + 80] - mova m3, [inputq + 96] - packssdw m3, [inputq + 112] -%else - mova m0, [inputq + 0] - mova m1, [inputq + 16] - mova m2, [inputq + 32] - mova m3, [inputq + 48] -%endif - - punpcklwd m0, m1 - punpcklwd m2, m3 - punpckhdq m9, m0, m2 - punpckldq m0, m2 - SWAP 2, 9 - - ; m0 -> [0], [0] - ; m1 -> [1], [1] - ; m2 -> [2], [2] - ; m3 -> [3], [3] - punpckhqdq m10, m0, m0 - punpcklqdq m0, m0 - punpckhqdq m9, m2, m2 - punpcklqdq m2, m2 - SWAP 1, 10 - SWAP 3, 9 - - pmulhrsw m0, m12 - pmulhrsw m2, [dpw_30274_12540] - pmulhrsw m1, [dpw_6392_32138] - pmulhrsw m3, [dpw_m18204_27246] - - SUM_SUB 0, 2, 9 - SUM_SUB 1, 3, 9 - - punpcklqdq m9, m3, m3 - punpckhqdq m5, m3, m9 - - SUM_SUB 3, 5, 9 - punpckhqdq m5, m3 - pmulhrsw m5, m12 - - punpckhqdq m9, m1, m5 - punpcklqdq m1, m5 - SWAP 5, 9 - - SUM_SUB 0, 5, 9 - SUM_SUB 2, 1, 9 - - punpckhqdq m3, m0, m0 - punpckhqdq m4, m1, m1 - punpckhqdq m6, m5, m5 - punpckhqdq m7, m2, m2 - - punpcklwd m0, m3 - punpcklwd m7, m2 - punpcklwd m1, m4 - punpcklwd m6, m5 - - punpckhdq m4, m0, m7 - punpckldq m0, m7 - punpckhdq m10, m1, m6 - punpckldq m5, m1, m6 - - punpckhqdq m1, m0, m5 - punpcklqdq m0, m5 - punpckhqdq m3, m4, m10 - punpcklqdq m2, m4, m10 - - - pmulhrsw m0, m12 - pmulhrsw m6, m2, [dpw_30274_30274] - pmulhrsw m4, m2, [dpw_12540_12540] - - pmulhrsw m7, m1, [dpw_32138_32138] - pmulhrsw m1, [dpw_6392_6392] - pmulhrsw m5, m3, [dpw_m18204_m18204] - pmulhrsw m3, [dpw_27246_27246] - - mova m2, m0 - SUM_SUB 0, 6, 9 - SUM_SUB 2, 4, 9 - SUM_SUB 1, 5, 9 - SUM_SUB 7, 3, 9 - - SUM_SUB 3, 5, 9 - pmulhrsw m3, m12 - pmulhrsw m5, m12 - - SUM_SUB 0, 7, 9 - SUM_SUB 2, 3, 9 - SUM_SUB 4, 5, 9 - SUM_SUB 6, 1, 9 - - SWAP 3, 6 - SWAP 1, 2 - SWAP 2, 4 - - - pxor m12, m12 - ADD_STORE_8P_2X 0, 1, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 2, 3, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 4, 5, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 6, 7, 9, 10, 12 - - RET - -%define idx0 16 * 0 -%define idx1 16 * 1 -%define idx2 16 * 2 -%define idx3 16 * 3 -%define idx4 16 * 4 -%define idx5 16 * 5 -%define idx6 16 * 6 -%define idx7 16 * 7 -%define idx8 16 * 0 -%define idx9 16 * 1 -%define idx10 16 * 2 -%define idx11 16 * 3 -%define idx12 16 * 4 -%define idx13 16 * 5 -%define idx14 16 * 6 -%define idx15 16 * 7 -%define idx16 16 * 0 -%define idx17 16 * 1 -%define idx18 16 * 2 -%define idx19 16 * 3 -%define idx20 16 * 4 -%define idx21 16 * 5 -%define idx22 16 * 6 -%define idx23 16 * 7 -%define idx24 16 * 0 -%define idx25 16 * 1 -%define idx26 16 * 2 -%define idx27 16 * 3 -%define idx28 16 * 4 -%define idx29 16 * 5 -%define idx30 16 * 6 -%define idx31 16 * 7 - -; FROM idct32x32_add_neon.asm -; -; Instead of doing the transforms stage by stage, it is done by loading -; some input values and doing as many stages as possible to minimize the -; storing/loading of intermediate results. To fit within registers, the -; final coefficients are cut into four blocks: -; BLOCK A: 16-19,28-31 -; BLOCK B: 20-23,24-27 -; BLOCK C: 8-11,12-15 -; BLOCK D: 0-3,4-7 -; Blocks A and C are straight calculation through the various stages. In -; block B, further calculations are performed using the results from -; block A. In block D, further calculations are performed using the results -; from block C and then the final calculations are done using results from -; block A and B which have been combined at the end of block B. -; - -%macro IDCT32X32_34 4 - ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, m1 - pmulhrsw m1, [pw___804x2] ; stp1_16 - mova [r4 + 0], m0 - pmulhrsw m11, [pw_16364x2] ; stp2_31 - mova [r4 + 16 * 2], m2 - mova m12, m7 - pmulhrsw m7, [pw_15426x2] ; stp1_28 - mova [r4 + 16 * 4], m4 - pmulhrsw m12, [pw_m5520x2] ; stp2_19 - mova [r4 + 16 * 6], m6 - - ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m2, m1 ; stp1_16 - mova m0, m11 ; stp1_31 - mova m4, m7 ; stp1_28 - mova m15, m12 ; stp1_19 - - ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 - BUTTERFLY_4Xmm 4, 15, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 - - ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 - SUM_SUB 0, 15, 9 ; stp2_17, stp2_18 - SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 - SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 - - ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 4, 15, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 - BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 - - ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m6, m5 - pmulhrsw m5, [pw__3981x2] ; stp1_20 - mova [stp + %4 + idx28], m12 - mova [stp + %4 + idx29], m15 - pmulhrsw m6, [pw_15893x2] ; stp2_27 - mova [stp + %4 + idx30], m2 - mova m2, m3 - pmulhrsw m3, [pw_m2404x2] ; stp1_23 - mova [stp + %4 + idx31], m11 - pmulhrsw m2, [pw_16207x2] ; stp2_24 - - ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m13, m5 ; stp1_20 - mova m14, m6 ; stp1_27 - mova m15, m3 ; stp1_23 - mova m11, m2 ; stp1_24 - - ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 - BUTTERFLY_4Xmm 11, 15, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 - - ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 - SUM_SUB 15, 14, 9 ; stp2_22, stp2_21 - SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 - SUM_SUB 11, 13, 9 ; stp2_25, stp2_26 - - ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 - BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 - - ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 3, 9 ; stp2_16, stp2_23 - SUM_SUB 0, 15, 9 ; stp2_17, stp2_22 - SUM_SUB 4, 14, 9 ; stp2_18, stp2_21 - SUM_SUB 7, 5, 9 ; stp2_19, stp2_20 - mova [stp + %3 + idx16], m1 - mova [stp + %3 + idx17], m0 - mova [stp + %3 + idx18], m4 - mova [stp + %3 + idx19], m7 - - mova m4, [stp + %4 + idx28] - mova m7, [stp + %4 + idx29] - mova m10, [stp + %4 + idx30] - mova m12, [stp + %4 + idx31] - SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 - SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 - SUM_SUB 10, 11, 9 ; stp2_30, stp2_25 - SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 - mova [stp + %4 + idx28], m4 - mova [stp + %4 + idx29], m7 - mova [stp + %4 + idx30], m10 - mova [stp + %4 + idx31], m12 - - ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 6, 5, 9 - pmulhrsw m6, m10 ; stp1_27 - pmulhrsw m5, m10 ; stp1_20 - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_26 - pmulhrsw m14, m10 ; stp1_21 - SUM_SUB 11, 15, 9 - pmulhrsw m11, m10 ; stp1_25 - pmulhrsw m15, m10 ; stp1_22 - SUM_SUB 2, 3, 9 - pmulhrsw m2, m10 ; stp1_24 - pmulhrsw m3, m10 ; stp1_23 -%else - BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 - SWAP 6, 5 - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 - SWAP 13, 14 - BUTTERFLY_4X 11, 15, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 - SWAP 11, 15 - BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 - SWAP 2, 3 -%endif - - mova [stp + %4 + idx24], m2 - mova [stp + %4 + idx25], m11 - mova [stp + %4 + idx26], m13 - mova [stp + %4 + idx27], m6 - - ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 2] - mova m6, [rsp + transposed_in + 16 * 6] - - mova m1, m0 - pmulhrsw m0, [pw__1606x2] ; stp1_8 - mova [stp + %3 + idx20], m5 - mova [stp + %3 + idx21], m14 - pmulhrsw m1, [pw_16305x2] ; stp2_15 - mova [stp + %3 + idx22], m15 - mova m7, m6 - pmulhrsw m7, [pw_m4756x2] ; stp2_11 - mova [stp + %3 + idx23], m3 - pmulhrsw m6, [pw_15679x2] ; stp1_12 - - ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m3, m0 ; stp1_8 - mova m2, m1 ; stp1_15 - - ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 - mova m4, m7 ; stp1_11 - mova m5, m6 ; stp1_12 - BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 - - ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 - SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 - SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 - SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 - - ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 5, 4, 9 - pmulhrsw m5, m10 ; stp1_13 - pmulhrsw m4, m10 ; stp1_10 - SUM_SUB 6, 7, 9 - pmulhrsw m6, m10 ; stp1_12 - pmulhrsw m7, m10 ; stp1_11 -%else - BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 - SWAP 5, 4 - BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 - SWAP 6, 7 -%endif - - ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova [stp + %2 + idx8], m0 - mova [stp + %2 + idx9], m2 - mova [stp + %2 + idx10], m4 - mova [stp + %2 + idx11], m7 - - ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, [rsp + transposed_in + 16 * 4] - mova m12, m11 - pmulhrsw m11, [pw__3196x2] ; stp1_4 - pmulhrsw m12, [pw_16069x2] ; stp1_7 - - ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 0] - mova m10, [pw_11585x2] - pmulhrsw m0, m10 ; stp1_1 - - mova m14, m11 ; stp1_4 - mova m13, m12 ; stp1_7 - - ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_6 - pmulhrsw m14, m10 ; stp1_5 -%else - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 - SWAP 13, 14 -%endif - mova m7, m0 ; stp1_0 = stp1_1 - mova m4, m0 ; stp1_1 - mova m2, m7 ; stp1_0 - - ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 - SUM_SUB 7, 13, 9 ; stp1_1, stp1_6 - SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 - SUM_SUB 4, 11, 9 ; stp1_3, stp1_4 - - ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 1, 9 ; stp1_0, stp1_15 - SUM_SUB 7, 3, 9 ; stp1_1, stp1_14 - SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 - SUM_SUB 4, 6, 9 ; stp1_3, stp1_12 - - ; 0-3, 28-31 final stage - mova m15, [stp + %4 + idx30] - mova m10, [stp + %4 + idx31] - SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 - SUM_SUB 7, 15, 9 ; stp1_1, stp1_30 - mova [stp + %1 + idx0], m0 - mova [stp + %1 + idx1], m7 - mova [stp + %4 + idx30], m15 - mova [stp + %4 + idx31], m10 - mova m7, [stp + %4 + idx28] - mova m0, [stp + %4 + idx29] - SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 - SUM_SUB 4, 7, 9 ; stp1_3, stp1_28 - mova [stp + %1 + idx2], m2 - mova [stp + %1 + idx3], m4 - mova [stp + %4 + idx28], m7 - mova [stp + %4 + idx29], m0 - - ; 12-15, 16-19 final stage - mova m0, [stp + %3 + idx16] - mova m7, [stp + %3 + idx17] - mova m2, [stp + %3 + idx18] - mova m4, [stp + %3 + idx19] - SUM_SUB 1, 0, 9 ; stp1_15, stp1_16 - SUM_SUB 3, 7, 9 ; stp1_14, stp1_17 - SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 - SUM_SUB 6, 4, 9 ; stp1_12, stp1_19 - mova [stp + %2 + idx12], m6 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m3 - mova [stp + %2 + idx15], m1 - mova [stp + %3 + idx16], m0 - mova [stp + %3 + idx17], m7 - mova [stp + %3 + idx18], m2 - mova [stp + %3 + idx19], m4 - - mova m4, [stp + %2 + idx8] - mova m5, [stp + %2 + idx9] - mova m6, [stp + %2 + idx10] - mova m7, [stp + %2 + idx11] - SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 - SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 - SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 - SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 - - ; 4-7, 24-27 final stage - mova m0, [stp + %4 + idx27] - mova m1, [stp + %4 + idx26] - mova m2, [stp + %4 + idx25] - mova m3, [stp + %4 + idx24] - SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 - SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 - SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 - SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 - mova [stp + %4 + idx27], m0 - mova [stp + %4 + idx26], m1 - mova [stp + %4 + idx25], m2 - mova [stp + %4 + idx24], m3 - mova [stp + %1 + idx4], m11 - mova [stp + %1 + idx5], m14 - mova [stp + %1 + idx6], m13 - mova [stp + %1 + idx7], m12 - - ; 8-11, 20-23 final stage - mova m0, [stp + %3 + idx20] - mova m1, [stp + %3 + idx21] - mova m2, [stp + %3 + idx22] - mova m3, [stp + %3 + idx23] - SUM_SUB 7, 0, 9 ; stp1_11, stp_20 - SUM_SUB 6, 1, 9 ; stp1_10, stp_21 - SUM_SUB 5, 2, 9 ; stp1_9, stp_22 - SUM_SUB 4, 3, 9 ; stp1_8, stp_23 - mova [stp + %2 + idx8], m4 - mova [stp + %2 + idx9], m5 - mova [stp + %2 + idx10], m6 - mova [stp + %2 + idx11], m7 - mova [stp + %3 + idx20], m0 - mova [stp + %3 + idx21], m1 - mova [stp + %3 + idx22], m2 - mova [stp + %3 + idx23], m3 -%endmacro - -%macro RECON_AND_STORE 1 - mova m11, [pw_32] - lea stp, [rsp + %1] - mov r6, 32 - pxor m8, m8 -%%recon_and_store: - mova m0, [stp + 16 * 32 * 0] - mova m1, [stp + 16 * 32 * 1] - mova m2, [stp + 16 * 32 * 2] - mova m3, [stp + 16 * 32 * 3] - add stp, 16 - - paddw m0, m11 - paddw m1, m11 - paddw m2, m11 - paddw m3, m11 - psraw m0, 6 - psraw m1, 6 - psraw m2, 6 - psraw m3, 6 - movh m4, [outputq + 0] - movh m5, [outputq + 8] - movh m6, [outputq + 16] - movh m7, [outputq + 24] - punpcklbw m4, m8 - punpcklbw m5, m8 - punpcklbw m6, m8 - punpcklbw m7, m8 - paddw m0, m4 - paddw m1, m5 - paddw m2, m6 - paddw m3, m7 - packuswb m0, m1 - packuswb m2, m3 - mova [outputq + 0], m0 - mova [outputq + 16], m2 - lea outputq, [outputq + strideq] - dec r6 - jnz %%recon_and_store -%endmacro - -%define i32x32_size 16*32*5 -%define pass_two_start 16*32*0 -%define transposed_in 16*32*4 -%define pass_one_start 16*32*0 -%define stp r8 - -INIT_XMM ssse3 -cglobal idct32x32_34_add, 3, 11, 16, i32x32_size, input, output, stride - mova m8, [pd_8192] - lea stp, [rsp + pass_one_start] - -idct32x32_34: - mov r3, inputq - lea r4, [rsp + transposed_in] - -idct32x32_34_transpose: -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [r3 + 0] - packssdw m0, [r3 + 16] - mova m1, [r3 + 32 * 4] - packssdw m1, [r3 + 32 * 4 + 16] - mova m2, [r3 + 32 * 8] - packssdw m2, [r3 + 32 * 8 + 16] - mova m3, [r3 + 32 * 12] - packssdw m3, [r3 + 32 * 12 + 16] - mova m4, [r3 + 32 * 16] - packssdw m4, [r3 + 32 * 16 + 16] - mova m5, [r3 + 32 * 20] - packssdw m5, [r3 + 32 * 20 + 16] - mova m6, [r3 + 32 * 24] - packssdw m6, [r3 + 32 * 24 + 16] - mova m7, [r3 + 32 * 28] - packssdw m7, [r3 + 32 * 28 + 16] -%else - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 4] - mova m2, [r3 + 16 * 8] - mova m3, [r3 + 16 * 12] - mova m4, [r3 + 16 * 16] - mova m5, [r3 + 16 * 20] - mova m6, [r3 + 16 * 24] - mova m7, [r3 + 16 * 28] -%endif - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - IDCT32X32_34 16*0, 16*32, 16*64, 16*96 - lea stp, [stp + 16 * 8] - mov r6, 4 - lea stp, [rsp + pass_one_start] - lea r9, [rsp + pass_one_start] - -idct32x32_34_2: - lea r4, [rsp + transposed_in] - mov r3, r9 - -idct32x32_34_transpose_2: - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 1] - mova m2, [r3 + 16 * 2] - mova m3, [r3 + 16 * 3] - mova m4, [r3 + 16 * 4] - mova m5, [r3 + 16 * 5] - mova m6, [r3 + 16 * 6] - mova m7, [r3 + 16 * 7] - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - IDCT32X32_34 16*0, 16*8, 16*16, 16*24 - - lea stp, [stp + 16 * 32] - add r9, 16 * 32 - dec r6 - jnz idct32x32_34_2 - - RECON_AND_STORE pass_two_start - - RET - -%macro IDCT32X32_135 4 - ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m1, [rsp + transposed_in + 16 * 1] - mova m11, m1 - pmulhrsw m1, [pw___804x2] ; stp1_16 - pmulhrsw m11, [pw_16364x2] ; stp2_31 - - mova m7, [rsp + transposed_in + 16 * 7] - mova m12, m7 - pmulhrsw m7, [pw_15426x2] ; stp1_28 - pmulhrsw m12, [pw_m5520x2] ; stp2_19 - - mova m3, [rsp + transposed_in + 16 * 9] - mova m4, m3 - pmulhrsw m3, [pw__7005x2] ; stp1_18 - pmulhrsw m4, [pw_14811x2] ; stp2_29 - - mova m0, [rsp + transposed_in + 16 * 15] - mova m2, m0 - pmulhrsw m0, [pw_12140x2] ; stp1_30 - pmulhrsw m2, [pw_m11003x2] ; stp2_17 - - ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 - SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 - SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 - SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 - - ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 - BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 - - ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 - SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 - SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 - SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 - - ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 - BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 - - mova [stp + %3 + idx16], m1 - mova [stp + %3 + idx17], m0 - mova [stp + %3 + idx18], m4 - mova [stp + %3 + idx19], m7 - mova [stp + %4 + idx28], m12 - mova [stp + %4 + idx29], m3 - mova [stp + %4 + idx30], m2 - mova [stp + %4 + idx31], m11 - - ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m2, [rsp + transposed_in + 16 * 3] - mova m3, m2 - pmulhrsw m3, [pw_m2404x2] ; stp1_23 - pmulhrsw m2, [pw_16207x2] ; stp2_24 - - mova m5, [rsp + transposed_in + 16 * 5] - mova m6, m5 - pmulhrsw m5, [pw__3981x2] ; stp1_20 - pmulhrsw m6, [pw_15893x2] ; stp2_27 - - mova m14, [rsp + transposed_in + 16 * 11] - mova m13, m14 - pmulhrsw m13, [pw_m8423x2] ; stp1_21 - pmulhrsw m14, [pw_14053x2] ; stp2_26 - - mova m0, [rsp + transposed_in + 16 * 13] - mova m1, m0 - pmulhrsw m0, [pw__9760x2] ; stp1_22 - pmulhrsw m1, [pw_13160x2] ; stp2_25 - - ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 - SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 - SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 - SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 - - ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 - BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 - - ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 - SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 - SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 - SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 - - ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 - BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 - - ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %3 + idx16] - mova m7, [stp + %3 + idx17] - mova m11, [stp + %3 + idx18] - mova m12, [stp + %3 + idx19] - SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 - SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 - SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 - SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 - mova [stp + %3 + idx16], m4 - mova [stp + %3 + idx17], m7 - mova [stp + %3 + idx18], m11 - mova [stp + %3 + idx19], m12 - - mova m4, [stp + %4 + idx28] - mova m7, [stp + %4 + idx29] - mova m11, [stp + %4 + idx30] - mova m12, [stp + %4 + idx31] - SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 - SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 - SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 - SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 - mova [stp + %4 + idx28], m4 - mova [stp + %4 + idx29], m7 - mova [stp + %4 + idx30], m11 - mova [stp + %4 + idx31], m12 - - ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 6, 5, 9 - pmulhrsw m6, m10 ; stp1_27 - pmulhrsw m5, m10 ; stp1_20 - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_26 - pmulhrsw m14, m10 ; stp1_21 - SUM_SUB 1, 0, 9 - pmulhrsw m1, m10 ; stp1_25 - pmulhrsw m0, m10 ; stp1_22 - SUM_SUB 2, 3, 9 - pmulhrsw m2, m10 ; stp1_25 - pmulhrsw m3, m10 ; stp1_22 -%else - BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 - SWAP 6, 5 - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 - SWAP 13, 14 - BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 - SWAP 1, 0 - BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 - SWAP 2, 3 -%endif - mova [stp + %3 + idx20], m5 - mova [stp + %3 + idx21], m14 - mova [stp + %3 + idx22], m0 - mova [stp + %3 + idx23], m3 - mova [stp + %4 + idx24], m2 - mova [stp + %4 + idx25], m1 - mova [stp + %4 + idx26], m13 - mova [stp + %4 + idx27], m6 - - ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 2] - mova m1, m0 - pmulhrsw m0, [pw__1606x2] ; stp1_8 - pmulhrsw m1, [pw_16305x2] ; stp2_15 - - mova m6, [rsp + transposed_in + 16 * 6] - mova m7, m6 - pmulhrsw m7, [pw_m4756x2] ; stp2_11 - pmulhrsw m6, [pw_15679x2] ; stp1_12 - - mova m4, [rsp + transposed_in + 16 * 10] - mova m5, m4 - pmulhrsw m4, [pw__7723x2] ; stp1_10 - pmulhrsw m5, [pw_14449x2] ; stp2_13 - - mova m2, [rsp + transposed_in + 16 * 14] - mova m3, m2 - pmulhrsw m3, [pw_m10394x2] ; stp1_9 - pmulhrsw m2, [pw_12665x2] ; stp2_14 - - ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 - SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 - SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 - SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 - - ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 - BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 - - ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 - SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 - SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 - SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 - - ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 5, 4, 9 - pmulhrsw m5, m10 ; stp1_13 - pmulhrsw m4, m10 ; stp1_10 - SUM_SUB 6, 7, 9 - pmulhrsw m6, m10 ; stp1_12 - pmulhrsw m7, m10 ; stp1_11 -%else - BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 - SWAP 5, 4 - BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 - SWAP 6, 7 -%endif - ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova [stp + %2 + idx8], m0 - mova [stp + %2 + idx9], m2 - mova [stp + %2 + idx10], m4 - mova [stp + %2 + idx11], m7 - mova [stp + %2 + idx12], m6 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m3 - mova [stp + %2 + idx15], m1 - - ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, [rsp + transposed_in + 16 * 4] - mova m12, m11 - pmulhrsw m11, [pw__3196x2] ; stp1_4 - pmulhrsw m12, [pw_16069x2] ; stp1_7 - - mova m13, [rsp + transposed_in + 16 * 12] - mova m14, m13 - pmulhrsw m13, [pw_13623x2] ; stp1_6 - pmulhrsw m14, [pw_m9102x2] ; stp1_5 - - ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 0] - mova m2, [rsp + transposed_in + 16 * 8] - pmulhrsw m0, [pw_11585x2] ; stp1_1 - mova m3, m2 - pmulhrsw m2, [pw__6270x2] ; stp1_2 - pmulhrsw m3, [pw_15137x2] ; stp1_3 - - SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 - SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 - - ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_6 - pmulhrsw m14, m10 ; stp1_5 -%else - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 - SWAP 13, 14 -%endif - mova m1, m0 ; stp1_0 = stp1_1 - SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 - SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 - - ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 - SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 - SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 - SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 - - ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %2 + idx12] - mova m5, [stp + %2 + idx13] - mova m6, [stp + %2 + idx14] - mova m7, [stp + %2 + idx15] - SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 - SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 - SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 - SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 - - ; 0-3, 28-31 final stage - mova m10, [stp + %4 + idx31] - mova m15, [stp + %4 + idx30] - SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 - SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 - mova [stp + %1 + idx0], m0 - mova [stp + %1 + idx1], m1 - mova [stp + %4 + idx31], m10 - mova [stp + %4 + idx30], m15 - mova m0, [stp + %4 + idx29] - mova m1, [stp + %4 + idx28] - SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 - SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 - mova [stp + %1 + idx2], m2 - mova [stp + %1 + idx3], m3 - mova [stp + %4 + idx29], m0 - mova [stp + %4 + idx28], m1 - - ; 12-15, 16-19 final stage - mova m0, [stp + %3 + idx16] - mova m1, [stp + %3 + idx17] - mova m2, [stp + %3 + idx18] - mova m3, [stp + %3 + idx19] - SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 - SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 - SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 - SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 - mova [stp + %2 + idx12], m4 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m6 - mova [stp + %2 + idx15], m7 - mova [stp + %3 + idx16], m0 - mova [stp + %3 + idx17], m1 - mova [stp + %3 + idx18], m2 - mova [stp + %3 + idx19], m3 - - mova m4, [stp + %2 + idx8] - mova m5, [stp + %2 + idx9] - mova m6, [stp + %2 + idx10] - mova m7, [stp + %2 + idx11] - SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 - SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 - SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 - SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 - - ; 4-7, 24-27 final stage - mova m3, [stp + %4 + idx24] - mova m2, [stp + %4 + idx25] - mova m1, [stp + %4 + idx26] - mova m0, [stp + %4 + idx27] - SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 - SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 - SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 - SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 - mova [stp + %4 + idx24], m3 - mova [stp + %4 + idx25], m2 - mova [stp + %4 + idx26], m1 - mova [stp + %4 + idx27], m0 - mova [stp + %1 + idx4], m11 - mova [stp + %1 + idx5], m14 - mova [stp + %1 + idx6], m13 - mova [stp + %1 + idx7], m12 - - ; 8-11, 20-23 final stage - mova m0, [stp + %3 + idx20] - mova m1, [stp + %3 + idx21] - mova m2, [stp + %3 + idx22] - mova m3, [stp + %3 + idx23] - SUM_SUB 7, 0, 9 ; stp1_11, stp_20 - SUM_SUB 6, 1, 9 ; stp1_10, stp_21 - SUM_SUB 5, 2, 9 ; stp1_9, stp_22 - SUM_SUB 4, 3, 9 ; stp1_8, stp_23 - mova [stp + %2 + idx8], m4 - mova [stp + %2 + idx9], m5 - mova [stp + %2 + idx10], m6 - mova [stp + %2 + idx11], m7 - mova [stp + %3 + idx20], m0 - mova [stp + %3 + idx21], m1 - mova [stp + %3 + idx22], m2 - mova [stp + %3 + idx23], m3 -%endmacro - -INIT_XMM ssse3 -cglobal idct32x32_135_add, 3, 11, 16, i32x32_size, input, output, stride - mova m8, [pd_8192] - mov r6, 2 - lea stp, [rsp + pass_one_start] - -idct32x32_135: - mov r3, inputq - lea r4, [rsp + transposed_in] - mov r7, 2 - -idct32x32_135_transpose: -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [r3 + 0] - packssdw m0, [r3 + 16] - mova m1, [r3 + 32 * 4] - packssdw m1, [r3 + 32 * 4 + 16] - mova m2, [r3 + 32 * 8] - packssdw m2, [r3 + 32 * 8 + 16] - mova m3, [r3 + 32 * 12] - packssdw m3, [r3 + 32 * 12 + 16] - mova m4, [r3 + 32 * 16] - packssdw m4, [r3 + 32 * 16 + 16] - mova m5, [r3 + 32 * 20] - packssdw m5, [r3 + 32 * 20 + 16] - mova m6, [r3 + 32 * 24] - packssdw m6, [r3 + 32 * 24 + 16] - mova m7, [r3 + 32 * 28] - packssdw m7, [r3 + 32 * 28 + 16] -%else - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 4] - mova m2, [r3 + 16 * 8] - mova m3, [r3 + 16 * 12] - mova m4, [r3 + 16 * 16] - mova m5, [r3 + 16 * 20] - mova m6, [r3 + 16 * 24] - mova m7, [r3 + 16 * 28] -%endif - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 - -%if CONFIG_VP9_HIGHBITDEPTH - add r3, 32 -%else - add r3, 16 -%endif - add r4, 16 * 8 - dec r7 - jne idct32x32_135_transpose - - IDCT32X32_135 16*0, 16*32, 16*64, 16*96 - lea stp, [stp + 16 * 8] -%if CONFIG_VP9_HIGHBITDEPTH - lea inputq, [inputq + 32 * 32] -%else - lea inputq, [inputq + 16 * 32] -%endif - dec r6 - jnz idct32x32_135 - - mov r6, 4 - lea stp, [rsp + pass_one_start] - lea r9, [rsp + pass_one_start] - -idct32x32_135_2: - lea r4, [rsp + transposed_in] - mov r3, r9 - mov r7, 2 - -idct32x32_135_transpose_2: - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 1] - mova m2, [r3 + 16 * 2] - mova m3, [r3 + 16 * 3] - mova m4, [r3 + 16 * 4] - mova m5, [r3 + 16 * 5] - mova m6, [r3 + 16 * 6] - mova m7, [r3 + 16 * 7] - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 - - add r3, 16 * 8 - add r4, 16 * 8 - dec r7 - jne idct32x32_135_transpose_2 - - IDCT32X32_135 16*0, 16*8, 16*16, 16*24 - - lea stp, [stp + 16 * 32] - add r9, 16 * 32 - dec r6 - jnz idct32x32_135_2 - - RECON_AND_STORE pass_two_start - - RET - -%macro IDCT32X32_1024 4 - ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m1, [rsp + transposed_in + 16 * 1] - mova m11, [rsp + transposed_in + 16 * 31] - BUTTERFLY_4X 1, 11, 804, 16364, m8, 9, 10 ; stp1_16, stp1_31 - - mova m0, [rsp + transposed_in + 16 * 15] - mova m2, [rsp + transposed_in + 16 * 17] - BUTTERFLY_4X 2, 0, 12140, 11003, m8, 9, 10 ; stp1_17, stp1_30 - - mova m7, [rsp + transposed_in + 16 * 7] - mova m12, [rsp + transposed_in + 16 * 25] - BUTTERFLY_4X 12, 7, 15426, 5520, m8, 9, 10 ; stp1_19, stp1_28 - - mova m3, [rsp + transposed_in + 16 * 9] - mova m4, [rsp + transposed_in + 16 * 23] - BUTTERFLY_4X 3, 4, 7005, 14811, m8, 9, 10 ; stp1_18, stp1_29 - - ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 - SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 - SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 - SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 - - ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 - BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 - - ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 - SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 - SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 - SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 - - ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 - BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 - - mova [stp + %3 + idx16], m1 - mova [stp + %3 + idx17], m0 - mova [stp + %3 + idx18], m4 - mova [stp + %3 + idx19], m7 - mova [stp + %4 + idx28], m12 - mova [stp + %4 + idx29], m3 - mova [stp + %4 + idx30], m2 - mova [stp + %4 + idx31], m11 - - ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m5, [rsp + transposed_in + 16 * 5] - mova m6, [rsp + transposed_in + 16 * 27] - BUTTERFLY_4X 5, 6, 3981, 15893, m8, 9, 10 ; stp1_20, stp1_27 - - mova m13, [rsp + transposed_in + 16 * 21] - mova m14, [rsp + transposed_in + 16 * 11] - BUTTERFLY_4X 13, 14, 14053, 8423, m8, 9, 10 ; stp1_21, stp1_26 - - mova m0, [rsp + transposed_in + 16 * 13] - mova m1, [rsp + transposed_in + 16 * 19] - BUTTERFLY_4X 0, 1, 9760, 13160, m8, 9, 10 ; stp1_22, stp1_25 - - mova m2, [rsp + transposed_in + 16 * 3] - mova m3, [rsp + transposed_in + 16 * 29] - BUTTERFLY_4X 3, 2, 16207, 2404, m8, 9, 10 ; stp1_23, stp1_24 - - ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 - SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 - SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 - SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 - - ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 - BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 - - ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 - SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 - SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 - SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 - - ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 - BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 - - ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %3 + idx16] - mova m7, [stp + %3 + idx17] - mova m11, [stp + %3 + idx18] - mova m12, [stp + %3 + idx19] - SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 - SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 - SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 - SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 - mova [stp + %3 + idx16], m4 - mova [stp + %3 + idx17], m7 - mova [stp + %3 + idx18], m11 - mova [stp + %3 + idx19], m12 - - mova m4, [stp + %4 + idx28] - mova m7, [stp + %4 + idx29] - mova m11, [stp + %4 + idx30] - mova m12, [stp + %4 + idx31] - SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 - SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 - SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 - SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 - mova [stp + %4 + idx28], m4 - mova [stp + %4 + idx29], m7 - mova [stp + %4 + idx30], m11 - mova [stp + %4 + idx31], m12 - - ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 6, 5, 9 - pmulhrsw m6, m10 ; stp1_27 - pmulhrsw m5, m10 ; stp1_20 - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_26 - pmulhrsw m14, m10 ; stp1_21 - SUM_SUB 1, 0, 9 - pmulhrsw m1, m10 ; stp1_25 - pmulhrsw m0, m10 ; stp1_22 - SUM_SUB 2, 3, 9 - pmulhrsw m2, m10 ; stp1_25 - pmulhrsw m3, m10 ; stp1_22 -%else - BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 - SWAP 6, 5 - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 - SWAP 13, 14 - BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 - SWAP 1, 0 - BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 - SWAP 2, 3 -%endif - mova [stp + %3 + idx20], m5 - mova [stp + %3 + idx21], m14 - mova [stp + %3 + idx22], m0 - mova [stp + %3 + idx23], m3 - mova [stp + %4 + idx24], m2 - mova [stp + %4 + idx25], m1 - mova [stp + %4 + idx26], m13 - mova [stp + %4 + idx27], m6 - - ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 2] - mova m1, [rsp + transposed_in + 16 * 30] - BUTTERFLY_4X 0, 1, 1606, 16305, m8, 9, 10 ; stp1_8, stp1_15 - - mova m2, [rsp + transposed_in + 16 * 14] - mova m3, [rsp + transposed_in + 16 * 18] - BUTTERFLY_4X 3, 2, 12665, 10394, m8, 9, 10 ; stp1_9, stp1_14 - - mova m4, [rsp + transposed_in + 16 * 10] - mova m5, [rsp + transposed_in + 16 * 22] - BUTTERFLY_4X 4, 5, 7723, 14449, m8, 9, 10 ; stp1_10, stp1_13 - - mova m6, [rsp + transposed_in + 16 * 6] - mova m7, [rsp + transposed_in + 16 * 26] - BUTTERFLY_4X 7, 6, 15679, 4756, m8, 9, 10 ; stp1_11, stp1_12 - - ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 - SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 - SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 - SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 - - ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 - BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 - - ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 - SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 - SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 - SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 - - ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 5, 4, 9 - pmulhrsw m5, m10 ; stp1_13 - pmulhrsw m4, m10 ; stp1_10 - SUM_SUB 6, 7, 9 - pmulhrsw m6, m10 ; stp1_12 - pmulhrsw m7, m10 ; stp1_11 -%else - BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 - SWAP 5, 4 - BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 - SWAP 6, 7 -%endif - ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova [stp + %2 + idx8], m0 - mova [stp + %2 + idx9], m2 - mova [stp + %2 + idx10], m4 - mova [stp + %2 + idx11], m7 - mova [stp + %2 + idx12], m6 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m3 - mova [stp + %2 + idx15], m1 - - ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, [rsp + transposed_in + 16 * 4] - mova m12, [rsp + transposed_in + 16 * 28] - BUTTERFLY_4X 11, 12, 3196, 16069, m8, 9, 10 ; stp1_4, stp1_7 - - mova m13, [rsp + transposed_in + 16 * 12] - mova m14, [rsp + transposed_in + 16 * 20] - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_5, stp1_6 - - ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 0] - mova m1, [rsp + transposed_in + 16 * 16] - -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 0, 1, 9 - pmulhrsw m0, m10 ; stp1_1 - pmulhrsw m1, m10 ; stp1_0 -%else - BUTTERFLY_4X 0, 1, 11585, 11585, m8, 9, 10 ; stp1_1, stp1_0 - SWAP 0, 1 -%endif - mova m2, [rsp + transposed_in + 16 * 8] - mova m3, [rsp + transposed_in + 16 * 24] - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_2, stp1_3 - - mova m10, [pw_11585x2] - SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 - SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 - - ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_6 - pmulhrsw m14, m10 ; stp1_5 -%else - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 - SWAP 13, 14 -%endif - SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 - SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 - - ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 - SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 - SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 - SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 - - ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %2 + idx12] - mova m5, [stp + %2 + idx13] - mova m6, [stp + %2 + idx14] - mova m7, [stp + %2 + idx15] - SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 - SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 - SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 - SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 - - ; 0-3, 28-31 final stage - mova m10, [stp + %4 + idx31] - mova m15, [stp + %4 + idx30] - SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 - SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 - mova [stp + %1 + idx0], m0 - mova [stp + %1 + idx1], m1 - mova [stp + %4 + idx31], m10 - mova [stp + %4 + idx30], m15 - mova m0, [stp + %4 + idx29] - mova m1, [stp + %4 + idx28] - SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 - SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 - mova [stp + %1 + idx2], m2 - mova [stp + %1 + idx3], m3 - mova [stp + %4 + idx29], m0 - mova [stp + %4 + idx28], m1 - - ; 12-15, 16-19 final stage - mova m0, [stp + %3 + idx16] - mova m1, [stp + %3 + idx17] - mova m2, [stp + %3 + idx18] - mova m3, [stp + %3 + idx19] - SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 - SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 - SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 - SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 - mova [stp + %2 + idx12], m4 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m6 - mova [stp + %2 + idx15], m7 - mova [stp + %3 + idx16], m0 - mova [stp + %3 + idx17], m1 - mova [stp + %3 + idx18], m2 - mova [stp + %3 + idx19], m3 - - mova m4, [stp + %2 + idx8] - mova m5, [stp + %2 + idx9] - mova m6, [stp + %2 + idx10] - mova m7, [stp + %2 + idx11] - SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 - SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 - SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 - SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 - - ; 4-7, 24-27 final stage - mova m3, [stp + %4 + idx24] - mova m2, [stp + %4 + idx25] - mova m1, [stp + %4 + idx26] - mova m0, [stp + %4 + idx27] - SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 - SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 - SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 - SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 - mova [stp + %4 + idx24], m3 - mova [stp + %4 + idx25], m2 - mova [stp + %4 + idx26], m1 - mova [stp + %4 + idx27], m0 - mova [stp + %1 + idx4], m11 - mova [stp + %1 + idx5], m14 - mova [stp + %1 + idx6], m13 - mova [stp + %1 + idx7], m12 - - ; 8-11, 20-23 final stage - mova m0, [stp + %3 + idx20] - mova m1, [stp + %3 + idx21] - mova m2, [stp + %3 + idx22] - mova m3, [stp + %3 + idx23] - SUM_SUB 7, 0, 9 ; stp1_11, stp_20 - SUM_SUB 6, 1, 9 ; stp1_10, stp_21 - SUM_SUB 5, 2, 9 ; stp1_9, stp_22 - SUM_SUB 4, 3, 9 ; stp1_8, stp_23 - mova [stp + %2 + idx8], m4 - mova [stp + %2 + idx9], m5 - mova [stp + %2 + idx10], m6 - mova [stp + %2 + idx11], m7 - mova [stp + %3 + idx20], m0 - mova [stp + %3 + idx21], m1 - mova [stp + %3 + idx22], m2 - mova [stp + %3 + idx23], m3 -%endmacro - -INIT_XMM ssse3 -cglobal idct32x32_1024_add, 3, 11, 16, i32x32_size, input, output, stride - mova m8, [pd_8192] - mov r6, 4 - lea stp, [rsp + pass_one_start] - -idct32x32_1024: - mov r3, inputq - lea r4, [rsp + transposed_in] - mov r7, 4 - -idct32x32_1024_transpose: -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [r3 + 0] - packssdw m0, [r3 + 16] - mova m1, [r3 + 32 * 4] - packssdw m1, [r3 + 32 * 4 + 16] - mova m2, [r3 + 32 * 8] - packssdw m2, [r3 + 32 * 8 + 16] - mova m3, [r3 + 32 * 12] - packssdw m3, [r3 + 32 * 12 + 16] - mova m4, [r3 + 32 * 16] - packssdw m4, [r3 + 32 * 16 + 16] - mova m5, [r3 + 32 * 20] - packssdw m5, [r3 + 32 * 20 + 16] - mova m6, [r3 + 32 * 24] - packssdw m6, [r3 + 32 * 24 + 16] - mova m7, [r3 + 32 * 28] - packssdw m7, [r3 + 32 * 28 + 16] -%else - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 4] - mova m2, [r3 + 16 * 8] - mova m3, [r3 + 16 * 12] - mova m4, [r3 + 16 * 16] - mova m5, [r3 + 16 * 20] - mova m6, [r3 + 16 * 24] - mova m7, [r3 + 16 * 28] -%endif - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 -%if CONFIG_VP9_HIGHBITDEPTH - add r3, 32 -%else - add r3, 16 -%endif - add r4, 16 * 8 - dec r7 - jne idct32x32_1024_transpose - - IDCT32X32_1024 16*0, 16*32, 16*64, 16*96 - - lea stp, [stp + 16 * 8] -%if CONFIG_VP9_HIGHBITDEPTH - lea inputq, [inputq + 32 * 32] -%else - lea inputq, [inputq + 16 * 32] -%endif - dec r6 - jnz idct32x32_1024 - - mov r6, 4 - lea stp, [rsp + pass_one_start] - lea r9, [rsp + pass_one_start] - -idct32x32_1024_2: - lea r4, [rsp + transposed_in] - mov r3, r9 - mov r7, 4 - -idct32x32_1024_transpose_2: - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 1] - mova m2, [r3 + 16 * 2] - mova m3, [r3 + 16 * 3] - mova m4, [r3 + 16 * 4] - mova m5, [r3 + 16 * 5] - mova m6, [r3 + 16 * 6] - mova m7, [r3 + 16 * 7] - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 - - add r3, 16 * 8 - add r4, 16 * 8 - dec r7 - jne idct32x32_1024_transpose_2 - - IDCT32X32_1024 16*0, 16*8, 16*16, 16*24 - - lea stp, [stp + 16 * 32] - add r9, 16 * 32 - dec r6 - jnz idct32x32_1024_2 - - RECON_AND_STORE pass_two_start - - RET -%endif diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm deleted file mode 100644 index fbbcd76bd7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm +++ /dev/null @@ -1,109 +0,0 @@ -; -; Copyright (c) 2015 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro REORDER_INPUTS 0 - ; a c d b to a b c d - SWAP 1, 3, 2 -%endmacro - -%macro TRANSFORM_COLS 0 - ; input: - ; m0 a - ; m1 b - ; m2 c - ; m3 d - paddw m0, m2 - psubw m3, m1 - - ; wide subtract - punpcklwd m4, m0 - punpcklwd m5, m3 - psrad m4, 16 - psrad m5, 16 - psubd m4, m5 - psrad m4, 1 - packssdw m4, m4 ; e - - psubw m5, m4, m1 ; b - psubw m4, m2 ; c - psubw m0, m5 - paddw m3, m4 - ; m0 a - SWAP 1, 5 ; m1 b - SWAP 2, 4 ; m2 c - ; m3 d -%endmacro - -%macro TRANSPOSE_4X4 0 - punpcklwd m0, m2 - punpcklwd m1, m3 - mova m2, m0 - punpcklwd m0, m1 - punpckhwd m2, m1 - pshufd m1, m0, 0x0e - pshufd m3, m2, 0x0e -%endmacro - -; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3 -%macro TRANSPOSE_4X4_WIDE 0 - mova m3, m0 - punpcklwd m0, m1 - punpckhwd m3, m1 - mova m2, m0 - punpcklwd m0, m3 - punpckhwd m2, m3 - pshufd m1, m0, 0x0e - pshufd m3, m2, 0x0e -%endmacro - -%macro ADD_STORE_4P_2X 5 ; src1, src2, tmp1, tmp2, zero - movd m%3, [outputq] - movd m%4, [outputq + strideq] - punpcklbw m%3, m%5 - punpcklbw m%4, m%5 - paddw m%1, m%3 - paddw m%2, m%4 - packuswb m%1, m%5 - packuswb m%2, m%5 - movd [outputq], m%1 - movd [outputq + strideq], m%2 -%endmacro - -INIT_XMM sse2 -cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [inputq + 0] - packssdw m0, [inputq + 16] - mova m1, [inputq + 32] - packssdw m1, [inputq + 48] -%else - mova m0, [inputq + 0] - mova m1, [inputq + 16] -%endif - psraw m0, 2 - psraw m1, 2 - - TRANSPOSE_4X4_WIDE - REORDER_INPUTS - TRANSFORM_COLS - TRANSPOSE_4X4 - REORDER_INPUTS - TRANSFORM_COLS - - pxor m4, m4 - ADD_STORE_4P_2X 0, 1, 5, 6, 4 - lea outputq, [outputq + 2 * strideq] - ADD_STORE_4P_2X 2, 3, 5, 6, 4 - - RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c deleted file mode 100644 index be1087c1e9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c +++ /dev/null @@ -1,979 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include /* AVX2 */ - -#include "./vpx_dsp_rtcd.h" -#include "vpx_ports/mem.h" - -void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - __m128i mask, hev, flat, flat2; - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; - __m128i abs_p1p0; - - const __m128i thresh = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _thresh[0])); - const __m128i limit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _limit[0])); - const __m128i blimit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _blimit[0])); - - q4p4 = _mm_loadl_epi64((__m128i *) (s - 5 * p)); - q4p4 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *) (s + 4 * p))); - q3p3 = _mm_loadl_epi64((__m128i *) (s - 4 * p)); - q3p3 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *) (s + 3 * p))); - q2p2 = _mm_loadl_epi64((__m128i *) (s - 3 * p)); - q2p2 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *) (s + 2 * p))); - q1p1 = _mm_loadl_epi64((__m128i *) (s - 2 * p)); - q1p1 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *) (s + 1 * p))); - p1q1 = _mm_shuffle_epi32(q1p1, 78); - q0p0 = _mm_loadl_epi64((__m128i *) (s - 1 * p)); - q0p0 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *) (s - 0 * p))); - p0q0 = _mm_shuffle_epi32(q0p0, 78); - - { - __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; - abs_p1p0 = _mm_or_si128(_mm_subs_epu8(q1p1, q0p0), - _mm_subs_epu8(q0p0, q1p1)); - abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - fe = _mm_set1_epi8(0xfe); - ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - abs_p0q0 = _mm_or_si128(_mm_subs_epu8(q0p0, p0q0), - _mm_subs_epu8(p0q0, q0p0)); - abs_p1q1 = _mm_or_si128(_mm_subs_epu8(q1p1, p1q1), - _mm_subs_epu8(p1q1, q1p1)); - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(abs_p1p0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q2p2, q1p1), - _mm_subs_epu8(q1p1, q2p2)), - _mm_or_si128(_mm_subs_epu8(q3p3, q2p2), - _mm_subs_epu8(q2p2, q3p3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i t1 = _mm_set1_epi16(0x1); - __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); - __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); - __m128i qs0 = _mm_xor_si128(p0q0, t80); - __m128i qs1 = _mm_xor_si128(p1q1, t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; - __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; - - filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, qs0ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - filter1 = _mm_unpacklo_epi8(zero, filter1); - filter1 = _mm_srai_epi16(filter1, 0xB); - filter2 = _mm_unpacklo_epi8(zero, filter2); - filter2 = _mm_srai_epi16(filter2, 0xB); - - /* Filter1 >> 3 */ - filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); - qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); - - /* filt >> 1 */ - filt = _mm_adds_epi16(filter1, t1); - filt = _mm_srai_epi16(filt, 1); - filt = _mm_andnot_si128( - _mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), filt); - filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); - qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); - // loopfilter done - - { - __m128i work; - flat = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q2p2, q0p0), - _mm_subs_epu8(q0p0, q2p2)), - _mm_or_si128(_mm_subs_epu8(q3p3, q0p0), - _mm_subs_epu8(q0p0, q3p3))); - flat = _mm_max_epu8(abs_p1p0, flat); - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - - q5p5 = _mm_loadl_epi64((__m128i *) (s - 6 * p)); - q5p5 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q5p5), - (__m64 *) (s + 5 * p))); - - q6p6 = _mm_loadl_epi64((__m128i *) (s - 7 * p)); - q6p6 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q6p6), - (__m64 *) (s + 6 * p))); - - flat2 = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q4p4, q0p0), - _mm_subs_epu8(q0p0, q4p4)), - _mm_or_si128(_mm_subs_epu8(q5p5, q0p0), - _mm_subs_epu8(q0p0, q5p5))); - - q7p7 = _mm_loadl_epi64((__m128i *) (s - 8 * p)); - q7p7 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q7p7), - (__m64 *) (s + 7 * p))); - - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q6p6, q0p0), - _mm_subs_epu8(q0p0, q6p6)), - _mm_or_si128(_mm_subs_epu8(q7p7, q0p0), - _mm_subs_epu8(q0p0, q7p7))); - - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // flat and wide flat calculations - { - const __m128i eight = _mm_set1_epi16(8); - const __m128i four = _mm_set1_epi16(4); - __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; - __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; - __m128i pixelFilter_p, pixelFilter_q; - __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; - __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; - - p7_16 = _mm_unpacklo_epi8(q7p7, zero); - p6_16 = _mm_unpacklo_epi8(q6p6, zero); - p5_16 = _mm_unpacklo_epi8(q5p5, zero); - p4_16 = _mm_unpacklo_epi8(q4p4, zero); - p3_16 = _mm_unpacklo_epi8(q3p3, zero); - p2_16 = _mm_unpacklo_epi8(q2p2, zero); - p1_16 = _mm_unpacklo_epi8(q1p1, zero); - p0_16 = _mm_unpacklo_epi8(q0p0, zero); - q0_16 = _mm_unpackhi_epi8(q0p0, zero); - q1_16 = _mm_unpackhi_epi8(q1p1, zero); - q2_16 = _mm_unpackhi_epi8(q2p2, zero); - q3_16 = _mm_unpackhi_epi8(q3p3, zero); - q4_16 = _mm_unpackhi_epi8(q4p4, zero); - q5_16 = _mm_unpackhi_epi8(q5p5, zero); - q6_16 = _mm_unpackhi_epi8(q6p6, zero); - q7_16 = _mm_unpackhi_epi8(q7p7, zero); - - pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), - _mm_add_epi16(p4_16, p3_16)); - pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), - _mm_add_epi16(q4_16, q3_16)); - - pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, - _mm_add_epi16(p2_16, p1_16)); - pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); - - pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, - _mm_add_epi16(q2_16, q1_16)); - pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); - pixelFilter_p = _mm_add_epi16(eight, - _mm_add_epi16(pixelFilter_p, pixelFilter_q)); - pixetFilter_p2p1p0 = _mm_add_epi16(four, - _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7_16, p0_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(q7_16, q0_16)), - 4); - flat2_q0p0 = _mm_packus_epi16(res_p, res_q); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(p3_16, p0_16)), 3); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(q3_16, q0_16)), 3); - - flat_q0p0 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(p7_16, p7_16); - sum_q7 = _mm_add_epi16(q7_16, q7_16); - sum_p3 = _mm_add_epi16(p3_16, p3_16); - sum_q3 = _mm_add_epi16(q3_16, q3_16); - - pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1_16)), - 4); - flat2_q1p1 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p1_16)), 3); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q1_16)), 3); - flat_q1p1 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - sum_p3 = _mm_add_epi16(sum_p3, p3_16); - sum_q3 = _mm_add_epi16(sum_q3, q3_16); - - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p2_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q2_16)), - 4); - flat2_q2p2 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); - - res_p = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p2_16)), 3); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q2_16)), 3); - flat_q2p2 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p3_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q3_16)), - 4); - flat2_q3p3 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p4_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q4_16)), - 4); - flat2_q4p4 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p5_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q5_16)), - 4); - flat2_q5p5 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p6_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q6_16)), - 4); - flat2_q6p6 = _mm_packus_epi16(res_p, res_q); - } - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - flat = _mm_shuffle_epi32(flat, 68); - flat2 = _mm_shuffle_epi32(flat2, 68); - - q2p2 = _mm_andnot_si128(flat, q2p2); - flat_q2p2 = _mm_and_si128(flat, flat_q2p2); - q2p2 = _mm_or_si128(q2p2, flat_q2p2); - - qs1ps1 = _mm_andnot_si128(flat, qs1ps1); - flat_q1p1 = _mm_and_si128(flat, flat_q1p1); - q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); - - qs0ps0 = _mm_andnot_si128(flat, qs0ps0); - flat_q0p0 = _mm_and_si128(flat, flat_q0p0); - q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); - - q6p6 = _mm_andnot_si128(flat2, q6p6); - flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); - q6p6 = _mm_or_si128(q6p6, flat2_q6p6); - _mm_storel_epi64((__m128i *) (s - 7 * p), q6p6); - _mm_storeh_pi((__m64 *) (s + 6 * p), _mm_castsi128_ps(q6p6)); - - q5p5 = _mm_andnot_si128(flat2, q5p5); - flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); - q5p5 = _mm_or_si128(q5p5, flat2_q5p5); - _mm_storel_epi64((__m128i *) (s - 6 * p), q5p5); - _mm_storeh_pi((__m64 *) (s + 5 * p), _mm_castsi128_ps(q5p5)); - - q4p4 = _mm_andnot_si128(flat2, q4p4); - flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); - q4p4 = _mm_or_si128(q4p4, flat2_q4p4); - _mm_storel_epi64((__m128i *) (s - 5 * p), q4p4); - _mm_storeh_pi((__m64 *) (s + 4 * p), _mm_castsi128_ps(q4p4)); - - q3p3 = _mm_andnot_si128(flat2, q3p3); - flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); - q3p3 = _mm_or_si128(q3p3, flat2_q3p3); - _mm_storel_epi64((__m128i *) (s - 4 * p), q3p3); - _mm_storeh_pi((__m64 *) (s + 3 * p), _mm_castsi128_ps(q3p3)); - - q2p2 = _mm_andnot_si128(flat2, q2p2); - flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); - q2p2 = _mm_or_si128(q2p2, flat2_q2p2); - _mm_storel_epi64((__m128i *) (s - 3 * p), q2p2); - _mm_storeh_pi((__m64 *) (s + 2 * p), _mm_castsi128_ps(q2p2)); - - q1p1 = _mm_andnot_si128(flat2, q1p1); - flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); - q1p1 = _mm_or_si128(q1p1, flat2_q1p1); - _mm_storel_epi64((__m128i *) (s - 2 * p), q1p1); - _mm_storeh_pi((__m64 *) (s + 1 * p), _mm_castsi128_ps(q1p1)); - - q0p0 = _mm_andnot_si128(flat2, q0p0); - flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); - q0p0 = _mm_or_si128(q0p0, flat2_q0p0); - _mm_storel_epi64((__m128i *) (s - 1 * p), q0p0); - _mm_storeh_pi((__m64 *) (s - 0 * p), _mm_castsi128_ps(q0p0)); - } -} - -DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { - 0, 128, 1, 128, 2, 128, 3, 128, 4, 128, 5, 128, 6, 128, 7, 128, - 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 -}; - -void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - __m128i mask, hev, flat, flat2; - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - __m128i p7, p6, p5; - __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; - __m128i q5, q6, q7; - __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4, - q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1, - p256_0, q256_0; - - const __m128i thresh = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _thresh[0])); - const __m128i limit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _limit[0])); - const __m128i blimit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _blimit[0])); - - p256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 5 * p))); - p256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 4 * p))); - p256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 3 * p))); - p256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 2 * p))); - p256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 1 * p))); - q256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 0 * p))); - q256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 1 * p))); - q256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 2 * p))); - q256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 3 * p))); - q256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 4 * p))); - - p4 = _mm256_castsi256_si128(p256_4); - p3 = _mm256_castsi256_si128(p256_3); - p2 = _mm256_castsi256_si128(p256_2); - p1 = _mm256_castsi256_si128(p256_1); - p0 = _mm256_castsi256_si128(p256_0); - q0 = _mm256_castsi256_si128(q256_0); - q1 = _mm256_castsi256_si128(q256_1); - q2 = _mm256_castsi256_si128(q256_2); - q3 = _mm256_castsi256_si128(q256_3); - q4 = _mm256_castsi256_si128(q256_4); - - { - const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), - _mm_subs_epu8(p0, p1)); - const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), - _mm_subs_epu8(q0, q1)); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), - _mm_subs_epu8(q0, p0)); - __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), - _mm_subs_epu8(q1, p1)); - __m128i work; - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(flat, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p2, p1), _mm_subs_epu8(p1, p2)), - _mm_or_si128(_mm_subs_epu8(p3, p2), _mm_subs_epu8(p2, p3))); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q2, q1), _mm_subs_epu8(q1, q2)), - _mm_or_si128(_mm_subs_epu8(q3, q2), _mm_subs_epu8(q2, q3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - - __m128i ps1 = _mm_xor_si128(p1, t80); - __m128i ps0 = _mm_xor_si128(p0, t80); - __m128i qs0 = _mm_xor_si128(q0, t80); - __m128i qs1 = _mm_xor_si128(q1, t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - __m128i flat2_p6, flat2_p5, flat2_p4, flat2_p3, flat2_p2, flat2_p1, - flat2_p0, flat2_q0, flat2_q1, flat2_q2, flat2_q3, flat2_q4, - flat2_q5, flat2_q6, flat_p2, flat_p1, flat_p0, flat_q0, flat_q1, - flat_q2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - /* Filter1 >> 3 */ - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - - /* Filter2 >> 3 */ - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - - /* filt >> 1 */ - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - filt = _mm_andnot_si128(hev, filt); - ps1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - qs1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - // loopfilter done - - { - __m128i work; - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p2, p0), _mm_subs_epu8(p0, p2)), - _mm_or_si128(_mm_subs_epu8(q2, q0), _mm_subs_epu8(q0, q2))); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p3, p0), _mm_subs_epu8(p0, p3)), - _mm_or_si128(_mm_subs_epu8(q3, q0), _mm_subs_epu8(q0, q3))); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p4, p0), _mm_subs_epu8(p0, p4)), - _mm_or_si128(_mm_subs_epu8(q4, q0), _mm_subs_epu8(q0, q4))); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - - p256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 6 * p))); - q256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 5 * p))); - p5 = _mm256_castsi256_si128(p256_5); - q5 = _mm256_castsi256_si128(q256_5); - flat2 = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p5, p0), _mm_subs_epu8(p0, p5)), - _mm_or_si128(_mm_subs_epu8(q5, q0), _mm_subs_epu8(q0, q5))); - - flat2 = _mm_max_epu8(work, flat2); - p256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 7 * p))); - q256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 6 * p))); - p6 = _mm256_castsi256_si128(p256_6); - q6 = _mm256_castsi256_si128(q256_6); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p6, p0), _mm_subs_epu8(p0, p6)), - _mm_or_si128(_mm_subs_epu8(q6, q0), _mm_subs_epu8(q0, q6))); - - flat2 = _mm_max_epu8(work, flat2); - - p256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 8 * p))); - q256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 7 * p))); - p7 = _mm256_castsi256_si128(p256_7); - q7 = _mm256_castsi256_si128(q256_7); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p7, p0), _mm_subs_epu8(p0, p7)), - _mm_or_si128(_mm_subs_epu8(q7, q0), _mm_subs_epu8(q0, q7))); - - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // flat and wide flat calculations - { - const __m256i eight = _mm256_set1_epi16(8); - const __m256i four = _mm256_set1_epi16(4); - __m256i pixelFilter_p, pixelFilter_q, pixetFilter_p2p1p0, - pixetFilter_q2q1q0, sum_p7, sum_q7, sum_p3, sum_q3, res_p, - res_q; - - const __m256i filter = _mm256_load_si256( - (__m256i const *)filt_loopfilter_avx2); - p256_7 = _mm256_shuffle_epi8(p256_7, filter); - p256_6 = _mm256_shuffle_epi8(p256_6, filter); - p256_5 = _mm256_shuffle_epi8(p256_5, filter); - p256_4 = _mm256_shuffle_epi8(p256_4, filter); - p256_3 = _mm256_shuffle_epi8(p256_3, filter); - p256_2 = _mm256_shuffle_epi8(p256_2, filter); - p256_1 = _mm256_shuffle_epi8(p256_1, filter); - p256_0 = _mm256_shuffle_epi8(p256_0, filter); - q256_0 = _mm256_shuffle_epi8(q256_0, filter); - q256_1 = _mm256_shuffle_epi8(q256_1, filter); - q256_2 = _mm256_shuffle_epi8(q256_2, filter); - q256_3 = _mm256_shuffle_epi8(q256_3, filter); - q256_4 = _mm256_shuffle_epi8(q256_4, filter); - q256_5 = _mm256_shuffle_epi8(q256_5, filter); - q256_6 = _mm256_shuffle_epi8(q256_6, filter); - q256_7 = _mm256_shuffle_epi8(q256_7, filter); - - pixelFilter_p = _mm256_add_epi16(_mm256_add_epi16(p256_6, p256_5), - _mm256_add_epi16(p256_4, p256_3)); - pixelFilter_q = _mm256_add_epi16(_mm256_add_epi16(q256_6, q256_5), - _mm256_add_epi16(q256_4, q256_3)); - - pixetFilter_p2p1p0 = _mm256_add_epi16(p256_0, - _mm256_add_epi16(p256_2, p256_1)); - pixelFilter_p = _mm256_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); - - pixetFilter_q2q1q0 = _mm256_add_epi16(q256_0, - _mm256_add_epi16(q256_2, q256_1)); - pixelFilter_q = _mm256_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); - - pixelFilter_p = _mm256_add_epi16(eight, - _mm256_add_epi16(pixelFilter_p, pixelFilter_q)); - - pixetFilter_p2p1p0 = _mm256_add_epi16(four, - _mm256_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(p256_7, p256_0)), 4); - - flat2_p0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(q256_7, q256_0)), 4); - - flat2_q0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(p256_3, p256_0)), 3); - - flat_p0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(q256_3, q256_0)), 3); - - flat_q0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(p256_7, p256_7); - - sum_q7 = _mm256_add_epi16(q256_7, q256_7); - - sum_p3 = _mm256_add_epi16(p256_3, p256_3); - - sum_q3 = _mm256_add_epi16(q256_3, q256_3); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_p, p256_6); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_6); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_1)), 4); - - flat2_p1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_1)), 4); - - flat2_q1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_p2p1p0, p256_2); - - pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_2); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(sum_p3, p256_1)), 3); - - flat_p1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_q2q1q0, - _mm256_add_epi16(sum_q3, q256_1)), 3); - - flat_q1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - sum_p3 = _mm256_add_epi16(sum_p3, p256_3); - - sum_q3 = _mm256_add_epi16(sum_q3, q256_3); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_5); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_5); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_2)), 4); - - flat2_p2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_2)), 4); - - flat2_q2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_1); - - pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_q2q1q0, p256_1); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(sum_p3, p256_2)), 3); - - flat_p2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_q2q1q0, - _mm256_add_epi16(sum_q3, q256_2)), 3); - - flat_q2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_4); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_4); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_3)), 4); - - flat2_p3 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_3)), 4); - - flat2_q3 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_3); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_3); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_4)), 4); - - flat2_p4 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_4)), 4); - - flat2_q4 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_2); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_2); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_5)), 4); - - flat2_p5 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_5)), 4); - - flat2_q5 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_1); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_1); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_6)), 4); - - flat2_p6 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_6)), 4); - - flat2_q6 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - } - - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - p2 = _mm_andnot_si128(flat, p2); - flat_p2 = _mm_and_si128(flat, flat_p2); - p2 = _mm_or_si128(flat_p2, p2); - - p1 = _mm_andnot_si128(flat, ps1); - flat_p1 = _mm_and_si128(flat, flat_p1); - p1 = _mm_or_si128(flat_p1, p1); - - p0 = _mm_andnot_si128(flat, ps0); - flat_p0 = _mm_and_si128(flat, flat_p0); - p0 = _mm_or_si128(flat_p0, p0); - - q0 = _mm_andnot_si128(flat, qs0); - flat_q0 = _mm_and_si128(flat, flat_q0); - q0 = _mm_or_si128(flat_q0, q0); - - q1 = _mm_andnot_si128(flat, qs1); - flat_q1 = _mm_and_si128(flat, flat_q1); - q1 = _mm_or_si128(flat_q1, q1); - - q2 = _mm_andnot_si128(flat, q2); - flat_q2 = _mm_and_si128(flat, flat_q2); - q2 = _mm_or_si128(flat_q2, q2); - - p6 = _mm_andnot_si128(flat2, p6); - flat2_p6 = _mm_and_si128(flat2, flat2_p6); - p6 = _mm_or_si128(flat2_p6, p6); - _mm_storeu_si128((__m128i *) (s - 7 * p), p6); - - p5 = _mm_andnot_si128(flat2, p5); - flat2_p5 = _mm_and_si128(flat2, flat2_p5); - p5 = _mm_or_si128(flat2_p5, p5); - _mm_storeu_si128((__m128i *) (s - 6 * p), p5); - - p4 = _mm_andnot_si128(flat2, p4); - flat2_p4 = _mm_and_si128(flat2, flat2_p4); - p4 = _mm_or_si128(flat2_p4, p4); - _mm_storeu_si128((__m128i *) (s - 5 * p), p4); - - p3 = _mm_andnot_si128(flat2, p3); - flat2_p3 = _mm_and_si128(flat2, flat2_p3); - p3 = _mm_or_si128(flat2_p3, p3); - _mm_storeu_si128((__m128i *) (s - 4 * p), p3); - - p2 = _mm_andnot_si128(flat2, p2); - flat2_p2 = _mm_and_si128(flat2, flat2_p2); - p2 = _mm_or_si128(flat2_p2, p2); - _mm_storeu_si128((__m128i *) (s - 3 * p), p2); - - p1 = _mm_andnot_si128(flat2, p1); - flat2_p1 = _mm_and_si128(flat2, flat2_p1); - p1 = _mm_or_si128(flat2_p1, p1); - _mm_storeu_si128((__m128i *) (s - 2 * p), p1); - - p0 = _mm_andnot_si128(flat2, p0); - flat2_p0 = _mm_and_si128(flat2, flat2_p0); - p0 = _mm_or_si128(flat2_p0, p0); - _mm_storeu_si128((__m128i *) (s - 1 * p), p0); - - q0 = _mm_andnot_si128(flat2, q0); - flat2_q0 = _mm_and_si128(flat2, flat2_q0); - q0 = _mm_or_si128(flat2_q0, q0); - _mm_storeu_si128((__m128i *) (s - 0 * p), q0); - - q1 = _mm_andnot_si128(flat2, q1); - flat2_q1 = _mm_and_si128(flat2, flat2_q1); - q1 = _mm_or_si128(flat2_q1, q1); - _mm_storeu_si128((__m128i *) (s + 1 * p), q1); - - q2 = _mm_andnot_si128(flat2, q2); - flat2_q2 = _mm_and_si128(flat2, flat2_q2); - q2 = _mm_or_si128(flat2_q2, q2); - _mm_storeu_si128((__m128i *) (s + 2 * p), q2); - - q3 = _mm_andnot_si128(flat2, q3); - flat2_q3 = _mm_and_si128(flat2, flat2_q3); - q3 = _mm_or_si128(flat2_q3, q3); - _mm_storeu_si128((__m128i *) (s + 3 * p), q3); - - q4 = _mm_andnot_si128(flat2, q4); - flat2_q4 = _mm_and_si128(flat2, flat2_q4); - q4 = _mm_or_si128(flat2_q4, q4); - _mm_storeu_si128((__m128i *) (s + 4 * p), q4); - - q5 = _mm_andnot_si128(flat2, q5); - flat2_q5 = _mm_and_si128(flat2, flat2_q5); - q5 = _mm_or_si128(flat2_q5, q5); - _mm_storeu_si128((__m128i *) (s + 5 * p), q5); - - q6 = _mm_andnot_si128(flat2, q6); - flat2_q6 = _mm_and_si128(flat2, flat2_q6); - q6 = _mm_or_si128(flat2_q6, q6); - _mm_storeu_si128((__m128i *) (s + 6 * p), q6); - } -} diff --git a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c deleted file mode 100644 index 739adf31d0..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c +++ /dev/null @@ -1,1776 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include // SSE2 - -#include "./vpx_dsp_rtcd.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/emmintrin_compat.h" - -static INLINE __m128i abs_diff(__m128i a, __m128i b) { - return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); -} - -// filter_mask and hev_mask -#define FILTER_HEV_MASK do { \ - /* (abs(q1 - q0), abs(p1 - p0) */ \ - __m128i flat = abs_diff(q1p1, q0p0); \ - /* abs(p1 - q1), abs(p0 - q0) */ \ - const __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0); \ - __m128i abs_p0q0, abs_p1q1, work; \ - \ - /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */ \ - hev = _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \ - hev = _mm_cmpgt_epi16(hev, thresh); \ - hev = _mm_packs_epi16(hev, hev); \ - \ - /* const int8_t mask = filter_mask(*limit, *blimit, */ \ - /* p3, p2, p1, p0, q0, q1, q2, q3); */ \ - abs_p0q0 = _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */\ - abs_p1q1 = _mm_unpackhi_epi8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p1 - q1) */\ - abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9); \ - abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */ \ - /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \ - mask = _mm_adds_epu8(abs_p0q0, abs_p1q1); \ - /* abs(p3 - p2), abs(p2 - p1) */ \ - work = abs_diff(p3p2, p2p1); \ - flat = _mm_max_epu8(work, flat); \ - /* abs(q3 - q2), abs(q2 - q1) */ \ - work = abs_diff(q3q2, q2q1); \ - flat = _mm_max_epu8(work, flat); \ - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \ - mask = _mm_unpacklo_epi64(mask, flat); \ - mask = _mm_subs_epu8(mask, limit); \ - mask = _mm_cmpeq_epi8(mask, zero); \ - mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8)); \ -} while (0) - -#define FILTER4 do { \ - const __m128i t3t4 = _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, \ - 4, 4, 4, 4, 4, 4, 4, 4); \ - const __m128i t80 = _mm_set1_epi8(0x80); \ - __m128i filter, filter2filter1, work; \ - \ - ps1ps0 = _mm_xor_si128(p1p0, t80); /* ^ 0x80 */ \ - qs1qs0 = _mm_xor_si128(q1q0, t80); \ - \ - /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */ \ - work = _mm_subs_epi8(ps1ps0, qs1qs0); \ - filter = _mm_and_si128(_mm_srli_si128(work, 8), hev); \ - /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */ \ - filter = _mm_subs_epi8(filter, work); \ - filter = _mm_subs_epi8(filter, work); \ - filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */ \ - filter = _mm_and_si128(filter, mask); /* & mask */ \ - filter = _mm_unpacklo_epi64(filter, filter); \ - \ - /* filter1 = signed_char_clamp(filter + 4) >> 3; */ \ - /* filter2 = signed_char_clamp(filter + 3) >> 3; */ \ - filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */ \ - filter = _mm_unpackhi_epi8(filter2filter1, filter2filter1); \ - filter2filter1 = _mm_unpacklo_epi8(filter2filter1, filter2filter1); \ - filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */ \ - filter = _mm_srai_epi16(filter, 11); /* >> 3 */ \ - filter2filter1 = _mm_packs_epi16(filter2filter1, filter); \ - \ - /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */ \ - filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */ \ - filter = _mm_unpacklo_epi8(filter, filter); \ - filter = _mm_srai_epi16(filter, 9); /* round */ \ - filter = _mm_packs_epi16(filter, filter); \ - filter = _mm_andnot_si128(hev, filter); \ - \ - hev = _mm_unpackhi_epi64(filter2filter1, filter); \ - filter2filter1 = _mm_unpacklo_epi64(filter2filter1, filter); \ - \ - /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */ \ - qs1qs0 = _mm_subs_epi8(qs1qs0, filter2filter1); \ - /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */ \ - ps1ps0 = _mm_adds_epi8(ps1ps0, hev); \ - qs1qs0 = _mm_xor_si128(qs1qs0, t80); /* ^ 0x80 */ \ - ps1ps0 = _mm_xor_si128(ps1ps0, t80); /* ^ 0x80 */ \ -} while (0) - -void vpx_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */, - const uint8_t *_blimit, const uint8_t *_limit, - const uint8_t *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i limit = - _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), - _mm_loadl_epi64((const __m128i *)_limit)); - const __m128i thresh = - _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); - const __m128i ff = _mm_cmpeq_epi8(zero, zero); - __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; - __m128i mask, hev; - - p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), - _mm_loadl_epi64((__m128i *)(s - 4 * p))); - q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 1 * p))); - q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - _mm_loadl_epi64((__m128i *)(s + 0 * p))); - q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 3 * p))); - p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); - p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); - q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); - q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); - - FILTER_HEV_MASK; - FILTER4; - - _mm_storeh_pi((__m64 *)(s - 2 * p), _mm_castsi128_ps(ps1ps0)); // *op1 - _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0); // *op0 - _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0); // *oq0 - _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(qs1qs0)); // *oq1 -} - -void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, - const uint8_t *_blimit, const uint8_t *_limit, - const uint8_t *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i limit = - _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), - _mm_loadl_epi64((const __m128i *)_limit)); - const __m128i thresh = - _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); - const __m128i ff = _mm_cmpeq_epi8(zero, zero); - __m128i x0, x1, x2, x3; - __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; - __m128i mask, hev; - - // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 - q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 1 * p - 4))); - - // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 - x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 3 * p - 4))); - - // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 - x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 5 * p - 4))); - - // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 - x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 7 * p - 4))); - - // Transpose 8x8 - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - p1p0 = _mm_unpacklo_epi16(q1q0, x1); - // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 - x0 = _mm_unpacklo_epi16(x2, x3); - // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 - p3p2 = _mm_unpacklo_epi32(p1p0, x0); - // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 - p1p0 = _mm_unpackhi_epi32(p1p0, x0); - p3p2 = _mm_unpackhi_epi64(p3p2, _mm_slli_si128(p3p2, 8)); // swap lo and high - p1p0 = _mm_unpackhi_epi64(p1p0, _mm_slli_si128(p1p0, 8)); // swap lo and high - - // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 - q1q0 = _mm_unpackhi_epi16(q1q0, x1); - // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 - x2 = _mm_unpackhi_epi16(x2, x3); - // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 - q3q2 = _mm_unpackhi_epi32(q1q0, x2); - // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 - q1q0 = _mm_unpacklo_epi32(q1q0, x2); - - q0p0 = _mm_unpacklo_epi64(p1p0, q1q0); - q1p1 = _mm_unpackhi_epi64(p1p0, q1q0); - p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); - p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); - q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); - - FILTER_HEV_MASK; - FILTER4; - - // Transpose 8x4 to 4x8 - // qs1qs0: 20 21 22 23 24 25 26 27 30 31 32 33 34 34 36 37 - // ps1ps0: 10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07 - // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 - ps1ps0 = _mm_unpackhi_epi64(ps1ps0, _mm_slli_si128(ps1ps0, 8)); - // 10 30 11 31 12 32 13 33 14 34 15 35 16 36 17 37 - x0 = _mm_unpackhi_epi8(ps1ps0, qs1qs0); - // 00 20 01 21 02 22 03 23 04 24 05 25 06 26 07 27 - ps1ps0 = _mm_unpacklo_epi8(ps1ps0, qs1qs0); - // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 - qs1qs0 = _mm_unpackhi_epi8(ps1ps0, x0); - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0); - - *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - - *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0); - qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0); - qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0); - qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); -} - -void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); - __m128i mask, hev, flat, flat2; - __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; - __m128i abs_p1p0; - - q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * p)); - q4p4 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q4p4), - (__m64 *)(s + 4 * p))); - q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p)); - q3p3 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q3p3), - (__m64 *)(s + 3 * p))); - q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p)); - q2p2 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q2p2), - (__m64 *)(s + 2 * p))); - q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p)); - q1p1 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q1p1), - (__m64 *)(s + 1 * p))); - p1q1 = _mm_shuffle_epi32(q1p1, 78); - q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p)); - q0p0 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q0p0), - (__m64 *)(s - 0 * p))); - p0q0 = _mm_shuffle_epi32(q0p0, 78); - - { - __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; - abs_p1p0 = abs_diff(q1p1, q0p0); - abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - fe = _mm_set1_epi8(0xfe); - ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - abs_p0q0 = abs_diff(q0p0, p0q0); - abs_p1q1 = abs_diff(q1p1, p1q1); - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(abs_p1p0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - - work = _mm_max_epu8(abs_diff(q2p2, q1p1), - abs_diff(q3p3, q2p2)); - mask = _mm_max_epu8(work, mask); - mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i t1 = _mm_set1_epi16(0x1); - __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); - __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); - __m128i qs0 = _mm_xor_si128(p0q0, t80); - __m128i qs1 = _mm_xor_si128(p1q1, t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; - __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; - - filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, qs0ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - filter1 = _mm_unpacklo_epi8(zero, filter1); - filter1 = _mm_srai_epi16(filter1, 0xB); - filter2 = _mm_unpacklo_epi8(zero, filter2); - filter2 = _mm_srai_epi16(filter2, 0xB); - - // Filter1 >> 3 - filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); - qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); - - // filt >> 1 - filt = _mm_adds_epi16(filter1, t1); - filt = _mm_srai_epi16(filt, 1); - filt = _mm_andnot_si128(_mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), - filt); - filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); - qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); - // loopfilter done - - { - __m128i work; - flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); - flat = _mm_max_epu8(abs_p1p0, flat); - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - - q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * p)); - q5p5 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q5p5), - (__m64 *)(s + 5 * p))); - - q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * p)); - q6p6 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q6p6), - (__m64 *)(s + 6 * p))); - flat2 = _mm_max_epu8(abs_diff(q4p4, q0p0), abs_diff(q5p5, q0p0)); - - q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * p)); - q7p7 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q7p7), - (__m64 *)(s + 7 * p))); - work = _mm_max_epu8(abs_diff(q6p6, q0p0), abs_diff(q7p7, q0p0)); - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // flat and wide flat calculations - { - const __m128i eight = _mm_set1_epi16(8); - const __m128i four = _mm_set1_epi16(4); - __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; - __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; - __m128i pixelFilter_p, pixelFilter_q; - __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; - __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; - - p7_16 = _mm_unpacklo_epi8(q7p7, zero);; - p6_16 = _mm_unpacklo_epi8(q6p6, zero); - p5_16 = _mm_unpacklo_epi8(q5p5, zero); - p4_16 = _mm_unpacklo_epi8(q4p4, zero); - p3_16 = _mm_unpacklo_epi8(q3p3, zero); - p2_16 = _mm_unpacklo_epi8(q2p2, zero); - p1_16 = _mm_unpacklo_epi8(q1p1, zero); - p0_16 = _mm_unpacklo_epi8(q0p0, zero); - q0_16 = _mm_unpackhi_epi8(q0p0, zero); - q1_16 = _mm_unpackhi_epi8(q1p1, zero); - q2_16 = _mm_unpackhi_epi8(q2p2, zero); - q3_16 = _mm_unpackhi_epi8(q3p3, zero); - q4_16 = _mm_unpackhi_epi8(q4p4, zero); - q5_16 = _mm_unpackhi_epi8(q5p5, zero); - q6_16 = _mm_unpackhi_epi8(q6p6, zero); - q7_16 = _mm_unpackhi_epi8(q7p7, zero); - - pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), - _mm_add_epi16(p4_16, p3_16)); - pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), - _mm_add_epi16(q4_16, q3_16)); - - pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); - pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); - - pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); - pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); - pixelFilter_p = _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, - pixelFilter_q)); - pixetFilter_p2p1p0 = _mm_add_epi16(four, - _mm_add_epi16(pixetFilter_p2p1p0, - pixetFilter_q2q1q0)); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(p7_16, p0_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(q7_16, q0_16)), 4); - flat2_q0p0 = _mm_packus_epi16(res_p, res_q); - res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(p3_16, p0_16)), 3); - res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(q3_16, q0_16)), 3); - - flat_q0p0 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(p7_16, p7_16); - sum_q7 = _mm_add_epi16(q7_16, q7_16); - sum_p3 = _mm_add_epi16(p3_16, p3_16); - sum_q3 = _mm_add_epi16(q3_16, q3_16); - - pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p1_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q1_16)), 4); - flat2_q1p1 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p1_16)), 3); - res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q1_16)), 3); - flat_q1p1 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - sum_p3 = _mm_add_epi16(sum_p3, p3_16); - sum_q3 = _mm_add_epi16(sum_q3, q3_16); - - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p2_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q2_16)), 4); - flat2_q2p2 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); - - res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p2_16)), 3); - res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q2_16)), 3); - flat_q2p2 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p3_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q3_16)), 4); - flat2_q3p3 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p4_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q4_16)), 4); - flat2_q4p4 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p5_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q5_16)), 4); - flat2_q5p5 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p6_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q6_16)), 4); - flat2_q6p6 = _mm_packus_epi16(res_p, res_q); - } - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - flat = _mm_shuffle_epi32(flat, 68); - flat2 = _mm_shuffle_epi32(flat2, 68); - - q2p2 = _mm_andnot_si128(flat, q2p2); - flat_q2p2 = _mm_and_si128(flat, flat_q2p2); - q2p2 = _mm_or_si128(q2p2, flat_q2p2); - - qs1ps1 = _mm_andnot_si128(flat, qs1ps1); - flat_q1p1 = _mm_and_si128(flat, flat_q1p1); - q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); - - qs0ps0 = _mm_andnot_si128(flat, qs0ps0); - flat_q0p0 = _mm_and_si128(flat, flat_q0p0); - q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); - - q6p6 = _mm_andnot_si128(flat2, q6p6); - flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); - q6p6 = _mm_or_si128(q6p6, flat2_q6p6); - _mm_storel_epi64((__m128i *)(s - 7 * p), q6p6); - _mm_storeh_pi((__m64 *)(s + 6 * p), _mm_castsi128_ps(q6p6)); - - q5p5 = _mm_andnot_si128(flat2, q5p5); - flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); - q5p5 = _mm_or_si128(q5p5, flat2_q5p5); - _mm_storel_epi64((__m128i *)(s - 6 * p), q5p5); - _mm_storeh_pi((__m64 *)(s + 5 * p), _mm_castsi128_ps(q5p5)); - - q4p4 = _mm_andnot_si128(flat2, q4p4); - flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); - q4p4 = _mm_or_si128(q4p4, flat2_q4p4); - _mm_storel_epi64((__m128i *)(s - 5 * p), q4p4); - _mm_storeh_pi((__m64 *)(s + 4 * p), _mm_castsi128_ps(q4p4)); - - q3p3 = _mm_andnot_si128(flat2, q3p3); - flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); - q3p3 = _mm_or_si128(q3p3, flat2_q3p3); - _mm_storel_epi64((__m128i *)(s - 4 * p), q3p3); - _mm_storeh_pi((__m64 *)(s + 3 * p), _mm_castsi128_ps(q3p3)); - - q2p2 = _mm_andnot_si128(flat2, q2p2); - flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); - q2p2 = _mm_or_si128(q2p2, flat2_q2p2); - _mm_storel_epi64((__m128i *)(s - 3 * p), q2p2); - _mm_storeh_pi((__m64 *)(s + 2 * p), _mm_castsi128_ps(q2p2)); - - q1p1 = _mm_andnot_si128(flat2, q1p1); - flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); - q1p1 = _mm_or_si128(q1p1, flat2_q1p1); - _mm_storel_epi64((__m128i *)(s - 2 * p), q1p1); - _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(q1p1)); - - q0p0 = _mm_andnot_si128(flat2, q0p0); - flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); - q0p0 = _mm_or_si128(q0p0, flat2_q0p0); - _mm_storel_epi64((__m128i *)(s - 1 * p), q0p0); - _mm_storeh_pi((__m64 *)(s - 0 * p), _mm_castsi128_ps(q0p0)); - } -} - -static INLINE __m128i filter_add2_sub2(const __m128i *const total, - const __m128i *const a1, - const __m128i *const a2, - const __m128i *const s1, - const __m128i *const s2) { - __m128i x = _mm_add_epi16(*a1, *total); - x = _mm_add_epi16(_mm_sub_epi16(x, _mm_add_epi16(*s1, *s2)), *a2); - return x; -} - -static INLINE __m128i filter8_mask(const __m128i *const flat, - const __m128i *const other_filt, - const __m128i *const f8_lo, - const __m128i *const f8_hi) { - const __m128i f8 = _mm_packus_epi16(_mm_srli_epi16(*f8_lo, 3), - _mm_srli_epi16(*f8_hi, 3)); - const __m128i result = _mm_and_si128(*flat, f8); - return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); -} - -static INLINE __m128i filter16_mask(const __m128i *const flat, - const __m128i *const other_filt, - const __m128i *const f_lo, - const __m128i *const f_hi) { - const __m128i f = _mm_packus_epi16(_mm_srli_epi16(*f_lo, 4), - _mm_srli_epi16(*f_hi, 4)); - const __m128i result = _mm_and_si128(*flat, f); - return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); -} - -void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); - __m128i mask, hev, flat, flat2; - __m128i p7, p6, p5; - __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; - __m128i q5, q6, q7; - - __m128i op2, op1, op0, oq0, oq1, oq2; - - __m128i max_abs_p1p0q1q0; - - p7 = _mm_loadu_si128((__m128i *)(s - 8 * p)); - p6 = _mm_loadu_si128((__m128i *)(s - 7 * p)); - p5 = _mm_loadu_si128((__m128i *)(s - 6 * p)); - p4 = _mm_loadu_si128((__m128i *)(s - 5 * p)); - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - q4 = _mm_loadu_si128((__m128i *)(s + 4 * p)); - q5 = _mm_loadu_si128((__m128i *)(s + 5 * p)); - q6 = _mm_loadu_si128((__m128i *)(s + 6 * p)); - q7 = _mm_loadu_si128((__m128i *)(s + 7 * p)); - - { - const __m128i abs_p1p0 = abs_diff(p1, p0); - const __m128i abs_q1q0 = abs_diff(q1, q0); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(zero, zero); - __m128i abs_p0q0 = abs_diff(p0, q0); - __m128i abs_p1q1 = abs_diff(p1, q1); - __m128i work; - max_abs_p1p0q1q0 = _mm_max_epu8(abs_p1p0, abs_q1q0); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(max_abs_p1p0q1q0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8(abs_diff(p2, p1), abs_diff(p3, p2)); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8(abs_diff(q2, q1), abs_diff(q3, q2)); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - { - __m128i work; - work = _mm_max_epu8(abs_diff(p2, p0), abs_diff(q2, q0)); - flat = _mm_max_epu8(work, max_abs_p1p0q1q0); - work = _mm_max_epu8(abs_diff(p3, p0), abs_diff(q3, q0)); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8(abs_diff(p4, p0), abs_diff(q4, q0)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - flat2 = _mm_max_epu8(abs_diff(p5, p0), abs_diff(q5, q0)); - flat2 = _mm_max_epu8(work, flat2); - work = _mm_max_epu8(abs_diff(p6, p0), abs_diff(q6, q0)); - flat2 = _mm_max_epu8(work, flat2); - work = _mm_max_epu8(abs_diff(p7, p0), abs_diff(q7, q0)); - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // filter4 - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - const __m128i ff = _mm_cmpeq_epi8(t4, t4); - - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - op1 = _mm_xor_si128(p1, t80); - op0 = _mm_xor_si128(p0, t80); - oq0 = _mm_xor_si128(q0, t80); - oq1 = _mm_xor_si128(q1, t80); - - hev = _mm_subs_epu8(max_abs_p1p0q1q0, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - filt = _mm_and_si128(_mm_subs_epi8(op1, oq1), hev); - - work_a = _mm_subs_epi8(oq0, op0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - oq0 = _mm_xor_si128(_mm_subs_epi8(oq0, filter1), t80); - - // Filter2 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - op0 = _mm_xor_si128(_mm_adds_epi8(op0, filter2), t80); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - filt = _mm_andnot_si128(hev, filt); - op1 = _mm_xor_si128(_mm_adds_epi8(op1, filt), t80); - oq1 = _mm_xor_si128(_mm_subs_epi8(oq1, filt), t80); - // loopfilter done - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // filter8 - { - const __m128i four = _mm_set1_epi16(4); - const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); - const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); - const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); - const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); - const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); - const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); - const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); - const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); - - const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); - const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); - const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); - const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); - const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); - const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); - const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); - const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); - __m128i f8_lo, f8_hi; - - f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, four), - _mm_add_epi16(p3_lo, p2_lo)); - f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f8_lo), - _mm_add_epi16(p2_lo, p1_lo)); - f8_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f8_lo); - - f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, four), - _mm_add_epi16(p3_hi, p2_hi)); - f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f8_hi), - _mm_add_epi16(p2_hi, p1_hi)); - f8_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f8_hi); - - op2 = filter8_mask(&flat, &p2, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q1_lo, &p1_lo, &p2_lo, &p3_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q1_hi, &p1_hi, &p2_hi, &p3_hi); - op1 = filter8_mask(&flat, &op1, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q2_lo, &p0_lo, &p1_lo, &p3_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q2_hi, &p0_hi, &p1_hi, &p3_hi); - op0 = filter8_mask(&flat, &op0, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q0_lo, &p0_lo, &p3_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q0_hi, &p0_hi, &p3_hi); - oq0 = filter8_mask(&flat, &oq0, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q1_lo, &q0_lo, &p2_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q1_hi, &q0_hi, &p2_hi); - oq1 = filter8_mask(&flat, &oq1, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q2_lo, &q1_lo, &p1_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q2_hi, &q1_hi, &p1_hi); - oq2 = filter8_mask(&flat, &q2, &f8_lo, &f8_hi); - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // wide flat calculations - { - const __m128i eight = _mm_set1_epi16(8); - const __m128i p7_lo = _mm_unpacklo_epi8(p7, zero); - const __m128i p6_lo = _mm_unpacklo_epi8(p6, zero); - const __m128i p5_lo = _mm_unpacklo_epi8(p5, zero); - const __m128i p4_lo = _mm_unpacklo_epi8(p4, zero); - const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); - const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); - const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); - const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); - const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); - const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); - const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); - const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); - const __m128i q4_lo = _mm_unpacklo_epi8(q4, zero); - const __m128i q5_lo = _mm_unpacklo_epi8(q5, zero); - const __m128i q6_lo = _mm_unpacklo_epi8(q6, zero); - const __m128i q7_lo = _mm_unpacklo_epi8(q7, zero); - - const __m128i p7_hi = _mm_unpackhi_epi8(p7, zero); - const __m128i p6_hi = _mm_unpackhi_epi8(p6, zero); - const __m128i p5_hi = _mm_unpackhi_epi8(p5, zero); - const __m128i p4_hi = _mm_unpackhi_epi8(p4, zero); - const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); - const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); - const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); - const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); - const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); - const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); - const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); - const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); - const __m128i q4_hi = _mm_unpackhi_epi8(q4, zero); - const __m128i q5_hi = _mm_unpackhi_epi8(q5, zero); - const __m128i q6_hi = _mm_unpackhi_epi8(q6, zero); - const __m128i q7_hi = _mm_unpackhi_epi8(q7, zero); - - __m128i f_lo; - __m128i f_hi; - - f_lo = _mm_sub_epi16(_mm_slli_epi16(p7_lo, 3), p7_lo); // p7 * 7 - f_lo = _mm_add_epi16(_mm_slli_epi16(p6_lo, 1), - _mm_add_epi16(p4_lo, f_lo)); - f_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f_lo), - _mm_add_epi16(p2_lo, p1_lo)); - f_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f_lo); - f_lo = _mm_add_epi16(_mm_add_epi16(p5_lo, eight), f_lo); - - f_hi = _mm_sub_epi16(_mm_slli_epi16(p7_hi, 3), p7_hi); // p7 * 7 - f_hi = _mm_add_epi16(_mm_slli_epi16(p6_hi, 1), - _mm_add_epi16(p4_hi, f_hi)); - f_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f_hi), - _mm_add_epi16(p2_hi, p1_hi)); - f_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f_hi); - f_hi = _mm_add_epi16(_mm_add_epi16(p5_hi, eight), f_hi); - - p6 = filter16_mask(&flat2, &p6, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 7 * p), p6); - - f_lo = filter_add2_sub2(&f_lo, &q1_lo, &p5_lo, &p6_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q1_hi, &p5_hi, &p6_hi, &p7_hi); - p5 = filter16_mask(&flat2, &p5, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 6 * p), p5); - - f_lo = filter_add2_sub2(&f_lo, &q2_lo, &p4_lo, &p5_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q2_hi, &p4_hi, &p5_hi, &p7_hi); - p4 = filter16_mask(&flat2, &p4, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 5 * p), p4); - - f_lo = filter_add2_sub2(&f_lo, &q3_lo, &p3_lo, &p4_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q3_hi, &p3_hi, &p4_hi, &p7_hi); - p3 = filter16_mask(&flat2, &p3, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 4 * p), p3); - - f_lo = filter_add2_sub2(&f_lo, &q4_lo, &p2_lo, &p3_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q4_hi, &p2_hi, &p3_hi, &p7_hi); - op2 = filter16_mask(&flat2, &op2, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 3 * p), op2); - - f_lo = filter_add2_sub2(&f_lo, &q5_lo, &p1_lo, &p2_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q5_hi, &p1_hi, &p2_hi, &p7_hi); - op1 = filter16_mask(&flat2, &op1, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 2 * p), op1); - - f_lo = filter_add2_sub2(&f_lo, &q6_lo, &p0_lo, &p1_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q6_hi, &p0_hi, &p1_hi, &p7_hi); - op0 = filter16_mask(&flat2, &op0, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 1 * p), op0); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q0_lo, &p0_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q0_hi, &p0_hi, &p7_hi); - oq0 = filter16_mask(&flat2, &oq0, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 0 * p), oq0); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q1_lo, &p6_lo, &q0_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q1_hi, &p6_hi, &q0_hi); - oq1 = filter16_mask(&flat2, &oq1, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 1 * p), oq1); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q2_lo, &p5_lo, &q1_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q2_hi, &p5_hi, &q1_hi); - oq2 = filter16_mask(&flat2, &oq2, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 2 * p), oq2); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q3_lo, &p4_lo, &q2_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q3_hi, &p4_hi, &q2_hi); - q3 = filter16_mask(&flat2, &q3, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 3 * p), q3); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q4_lo, &p3_lo, &q3_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q4_hi, &p3_hi, &q3_hi); - q4 = filter16_mask(&flat2, &q4, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 4 * p), q4); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q5_lo, &p2_lo, &q4_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q5_hi, &p2_hi, &q4_hi); - q5 = filter16_mask(&flat2, &q5, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 5 * p), q5); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q6_lo, &p1_lo, &q5_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q6_hi, &p1_hi, &q5_hi); - q6 = filter16_mask(&flat2, &q6, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 6 * p), q6); - } - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - } -} - -void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); - const __m128i zero = _mm_set1_epi16(0); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); - __m128i mask, hev, flat; - __m128i p3, p2, p1, p0, q0, q1, q2, q3; - __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0; - - q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)), - _mm_loadl_epi64((__m128i *)(s + 3 * p))); - q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), - _mm_loadl_epi64((__m128i *)(s + 2 * p))); - q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 1 * p))); - q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - _mm_loadl_epi64((__m128i *)(s - 0 * p))); - p1q1 = _mm_shuffle_epi32(q1p1, 78); - p0q0 = _mm_shuffle_epi32(q0p0, 78); - - { - // filter_mask and hev_mask - const __m128i one = _mm_set1_epi8(1); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(fe, fe); - __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; - abs_p1p0 = abs_diff(q1p1, q0p0); - abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - - abs_p0q0 = abs_diff(q0p0, p0q0); - abs_p1q1 = abs_diff(q1p1, p1q1); - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(abs_p1p0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - - work = _mm_max_epu8(abs_diff(q2p2, q1p1), - abs_diff(q3p3, q2p2)); - mask = _mm_max_epu8(work, mask); - mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - - // flat_mask4 - - flat = _mm_max_epu8(abs_diff(q2p2, q0p0), - abs_diff(q3p3, q0p0)); - flat = _mm_max_epu8(abs_p1p0, flat); - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - } - - { - const __m128i four = _mm_set1_epi16(4); - unsigned char *src = s; - { - __m128i workp_a, workp_b, workp_shft; - p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); - p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); - p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); - p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); - q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); - q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); - q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); - q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); - - workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); - workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); - workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op2[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op1[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op0[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq0[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq1[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq2[0], - _mm_packus_epi16(workp_shft, workp_shft)); - } - } - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i ps1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - t80); - const __m128i ps0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - t80); - const __m128i qs0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 0 * p)), - t80); - const __m128i qs1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 1 * p)), - t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - filter1 = _mm_unpacklo_epi8(zero, filter1); - filter1 = _mm_srai_epi16(filter1, 11); - filter1 = _mm_packs_epi16(filter1, filter1); - - // Filter2 >> 3 - filter2 = _mm_unpacklo_epi8(zero, filter2); - filter2 = _mm_srai_epi16(filter2, 11); - filter2 = _mm_packs_epi16(filter2, zero); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - filt = _mm_unpacklo_epi8(zero, filt); - filt = _mm_srai_epi16(filt, 9); - filt = _mm_packs_epi16(filt, zero); - - filt = _mm_andnot_si128(hev, filt); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - q0 = _mm_loadl_epi64((__m128i *)flat_oq0); - work_a = _mm_andnot_si128(flat, work_a); - q0 = _mm_and_si128(flat, q0); - q0 = _mm_or_si128(work_a, q0); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - q1 = _mm_loadl_epi64((__m128i *)flat_oq1); - work_a = _mm_andnot_si128(flat, work_a); - q1 = _mm_and_si128(flat, q1); - q1 = _mm_or_si128(work_a, q1); - - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q2 = _mm_loadl_epi64((__m128i *)flat_oq2); - work_a = _mm_andnot_si128(flat, work_a); - q2 = _mm_and_si128(flat, q2); - q2 = _mm_or_si128(work_a, q2); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - p0 = _mm_loadl_epi64((__m128i *)flat_op0); - work_a = _mm_andnot_si128(flat, work_a); - p0 = _mm_and_si128(flat, p0); - p0 = _mm_or_si128(work_a, p0); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - p1 = _mm_loadl_epi64((__m128i *)flat_op1); - work_a = _mm_andnot_si128(flat, work_a); - p1 = _mm_and_si128(flat, p1); - p1 = _mm_or_si128(work_a, p1); - - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p2 = _mm_loadl_epi64((__m128i *)flat_op2); - work_a = _mm_andnot_si128(flat, work_a); - p2 = _mm_and_si128(flat, p2); - p2 = _mm_or_si128(work_a, p2); - - _mm_storel_epi64((__m128i *)(s - 3 * p), p2); - _mm_storel_epi64((__m128i *)(s - 2 * p), p1); - _mm_storel_epi64((__m128i *)(s - 1 * p), p0); - _mm_storel_epi64((__m128i *)(s + 0 * p), q0); - _mm_storel_epi64((__m128i *)(s + 1 * p), q1); - _mm_storel_epi64((__m128i *)(s + 2 * p), q2); - } -} - -void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, - const uint8_t *_blimit0, - const uint8_t *_limit0, - const uint8_t *_thresh0, - const uint8_t *_blimit1, - const uint8_t *_limit1, - const uint8_t *_thresh1) { - DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); - const __m128i zero = _mm_set1_epi16(0); - const __m128i blimit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), - _mm_load_si128((const __m128i *)_blimit1)); - const __m128i limit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), - _mm_load_si128((const __m128i *)_limit1)); - const __m128i thresh = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), - _mm_load_si128((const __m128i *)_thresh1)); - - __m128i mask, hev, flat; - __m128i p3, p2, p1, p0, q0, q1, q2, q3; - - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - { - const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), - _mm_subs_epu8(p0, p1)); - const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), - _mm_subs_epu8(q0, q1)); - const __m128i one = _mm_set1_epi8(1); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), - _mm_subs_epu8(q0, p0)); - __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), - _mm_subs_epu8(q1, p1)); - __m128i work; - - // filter_mask and hev_mask - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(flat, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), - _mm_subs_epu8(p1, p2)), - _mm_or_si128(_mm_subs_epu8(p3, p2), - _mm_subs_epu8(p2, p3))); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), - _mm_subs_epu8(q1, q2)), - _mm_or_si128(_mm_subs_epu8(q3, q2), - _mm_subs_epu8(q2, q3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - - // flat_mask4 - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0), - _mm_subs_epu8(p0, p2)), - _mm_or_si128(_mm_subs_epu8(q2, q0), - _mm_subs_epu8(q0, q2))); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0), - _mm_subs_epu8(p0, p3)), - _mm_or_si128(_mm_subs_epu8(q3, q0), - _mm_subs_epu8(q0, q3))); - flat = _mm_max_epu8(work, flat); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - } - { - const __m128i four = _mm_set1_epi16(4); - unsigned char *src = s; - int i = 0; - - do { - __m128i workp_a, workp_b, workp_shft; - p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); - p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); - p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); - p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); - q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); - q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); - q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); - q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); - - workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); - workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); - workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op2[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op1[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op0[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq0[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq1[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq2[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - src += 8; - } while (++i < 2); - } - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - - const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), - t80); - const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), - t80); - const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), - t80); - const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), - t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - - // Filter2 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - - filt = _mm_andnot_si128(hev, filt); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - q0 = _mm_load_si128((__m128i *)flat_oq0); - work_a = _mm_andnot_si128(flat, work_a); - q0 = _mm_and_si128(flat, q0); - q0 = _mm_or_si128(work_a, q0); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - q1 = _mm_load_si128((__m128i *)flat_oq1); - work_a = _mm_andnot_si128(flat, work_a); - q1 = _mm_and_si128(flat, q1); - q1 = _mm_or_si128(work_a, q1); - - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q2 = _mm_load_si128((__m128i *)flat_oq2); - work_a = _mm_andnot_si128(flat, work_a); - q2 = _mm_and_si128(flat, q2); - q2 = _mm_or_si128(work_a, q2); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - p0 = _mm_load_si128((__m128i *)flat_op0); - work_a = _mm_andnot_si128(flat, work_a); - p0 = _mm_and_si128(flat, p0); - p0 = _mm_or_si128(work_a, p0); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - p1 = _mm_load_si128((__m128i *)flat_op1); - work_a = _mm_andnot_si128(flat, work_a); - p1 = _mm_and_si128(flat, p1); - p1 = _mm_or_si128(work_a, p1); - - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p2 = _mm_load_si128((__m128i *)flat_op2); - work_a = _mm_andnot_si128(flat, work_a); - p2 = _mm_and_si128(flat, p2); - p2 = _mm_or_si128(work_a, p2); - - _mm_storeu_si128((__m128i *)(s - 3 * p), p2); - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); - _mm_storeu_si128((__m128i *)(s + 2 * p), q2); - } -} - -void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, - const unsigned char *_blimit0, - const unsigned char *_limit0, - const unsigned char *_thresh0, - const unsigned char *_blimit1, - const unsigned char *_limit1, - const unsigned char *_thresh1) { - const __m128i blimit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), - _mm_load_si128((const __m128i *)_blimit1)); - const __m128i limit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), - _mm_load_si128((const __m128i *)_limit1)); - const __m128i thresh = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), - _mm_load_si128((const __m128i *)_thresh1)); - const __m128i zero = _mm_set1_epi16(0); - __m128i p3, p2, p1, p0, q0, q1, q2, q3; - __m128i mask, hev, flat; - - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - - // filter_mask and hev_mask - { - const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), - _mm_subs_epu8(p0, p1)); - const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), - _mm_subs_epu8(q0, q1)); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), - _mm_subs_epu8(q0, p0)); - __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), - _mm_subs_epu8(q1, p1)); - __m128i work; - - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(flat, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), - _mm_subs_epu8(p1, p2)), - _mm_or_si128(_mm_subs_epu8(p3, p2), - _mm_subs_epu8(p2, p3))); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), - _mm_subs_epu8(q1, q2)), - _mm_or_si128(_mm_subs_epu8(q3, q2), - _mm_subs_epu8(q2, q3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // filter4 - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - - const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), - t80); - const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), - t80); - const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), - t80); - const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), - t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - - // Filter2 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - - filt = _mm_andnot_si128(hev, filt); - - q0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - q1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - p0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - p1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); - } -} - -static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1, - int in_p, unsigned char *out, int out_p) { - __m128i x0, x1, x2, x3, x4, x5, x6, x7; - __m128i x8, x9, x10, x11, x12, x13, x14, x15; - - // 2-way interleave w/hoisting of unpacks - x0 = _mm_loadl_epi64((__m128i *)in0); // 1 - x1 = _mm_loadl_epi64((__m128i *)(in0 + in_p)); // 3 - x0 = _mm_unpacklo_epi8(x0, x1); // 1 - - x2 = _mm_loadl_epi64((__m128i *)(in0 + 2 * in_p)); // 5 - x3 = _mm_loadl_epi64((__m128i *)(in0 + 3*in_p)); // 7 - x1 = _mm_unpacklo_epi8(x2, x3); // 2 - - x4 = _mm_loadl_epi64((__m128i *)(in0 + 4*in_p)); // 9 - x5 = _mm_loadl_epi64((__m128i *)(in0 + 5*in_p)); // 11 - x2 = _mm_unpacklo_epi8(x4, x5); // 3 - - x6 = _mm_loadl_epi64((__m128i *)(in0 + 6*in_p)); // 13 - x7 = _mm_loadl_epi64((__m128i *)(in0 + 7*in_p)); // 15 - x3 = _mm_unpacklo_epi8(x6, x7); // 4 - x4 = _mm_unpacklo_epi16(x0, x1); // 9 - - x8 = _mm_loadl_epi64((__m128i *)in1); // 2 - x9 = _mm_loadl_epi64((__m128i *)(in1 + in_p)); // 4 - x8 = _mm_unpacklo_epi8(x8, x9); // 5 - x5 = _mm_unpacklo_epi16(x2, x3); // 10 - - x10 = _mm_loadl_epi64((__m128i *)(in1 + 2 * in_p)); // 6 - x11 = _mm_loadl_epi64((__m128i *)(in1 + 3*in_p)); // 8 - x9 = _mm_unpacklo_epi8(x10, x11); // 6 - - x12 = _mm_loadl_epi64((__m128i *)(in1 + 4*in_p)); // 10 - x13 = _mm_loadl_epi64((__m128i *)(in1 + 5*in_p)); // 12 - x10 = _mm_unpacklo_epi8(x12, x13); // 7 - x12 = _mm_unpacklo_epi16(x8, x9); // 11 - - x14 = _mm_loadl_epi64((__m128i *)(in1 + 6*in_p)); // 14 - x15 = _mm_loadl_epi64((__m128i *)(in1 + 7*in_p)); // 16 - x11 = _mm_unpacklo_epi8(x14, x15); // 8 - x13 = _mm_unpacklo_epi16(x10, x11); // 12 - - x6 = _mm_unpacklo_epi32(x4, x5); // 13 - x7 = _mm_unpackhi_epi32(x4, x5); // 14 - x14 = _mm_unpacklo_epi32(x12, x13); // 15 - x15 = _mm_unpackhi_epi32(x12, x13); // 16 - - // Store first 4-line result - _mm_storeu_si128((__m128i *)out, _mm_unpacklo_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + out_p), _mm_unpackhi_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + 2 * out_p), _mm_unpacklo_epi64(x7, x15)); - _mm_storeu_si128((__m128i *)(out + 3 * out_p), _mm_unpackhi_epi64(x7, x15)); - - x4 = _mm_unpackhi_epi16(x0, x1); - x5 = _mm_unpackhi_epi16(x2, x3); - x12 = _mm_unpackhi_epi16(x8, x9); - x13 = _mm_unpackhi_epi16(x10, x11); - - x6 = _mm_unpacklo_epi32(x4, x5); - x7 = _mm_unpackhi_epi32(x4, x5); - x14 = _mm_unpacklo_epi32(x12, x13); - x15 = _mm_unpackhi_epi32(x12, x13); - - // Store second 4-line result - _mm_storeu_si128((__m128i *)(out + 4 * out_p), _mm_unpacklo_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + 5 * out_p), _mm_unpackhi_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + 6 * out_p), _mm_unpacklo_epi64(x7, x15)); - _mm_storeu_si128((__m128i *)(out + 7 * out_p), _mm_unpackhi_epi64(x7, x15)); -} - -static INLINE void transpose(unsigned char *src[], int in_p, - unsigned char *dst[], int out_p, - int num_8x8_to_transpose) { - int idx8x8 = 0; - __m128i x0, x1, x2, x3, x4, x5, x6, x7; - do { - unsigned char *in = src[idx8x8]; - unsigned char *out = dst[idx8x8]; - - x0 = _mm_loadl_epi64((__m128i *)(in + 0*in_p)); // 00 01 02 03 04 05 06 07 - x1 = _mm_loadl_epi64((__m128i *)(in + 1*in_p)); // 10 11 12 13 14 15 16 17 - // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 - x0 = _mm_unpacklo_epi8(x0, x1); - - x2 = _mm_loadl_epi64((__m128i *)(in + 2*in_p)); // 20 21 22 23 24 25 26 27 - x3 = _mm_loadl_epi64((__m128i *)(in + 3*in_p)); // 30 31 32 33 34 35 36 37 - // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 - x1 = _mm_unpacklo_epi8(x2, x3); - - x4 = _mm_loadl_epi64((__m128i *)(in + 4*in_p)); // 40 41 42 43 44 45 46 47 - x5 = _mm_loadl_epi64((__m128i *)(in + 5*in_p)); // 50 51 52 53 54 55 56 57 - // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 - x2 = _mm_unpacklo_epi8(x4, x5); - - x6 = _mm_loadl_epi64((__m128i *)(in + 6*in_p)); // 60 61 62 63 64 65 66 67 - x7 = _mm_loadl_epi64((__m128i *)(in + 7*in_p)); // 70 71 72 73 74 75 76 77 - // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 - x3 = _mm_unpacklo_epi8(x6, x7); - - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - x4 = _mm_unpacklo_epi16(x0, x1); - // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 - x5 = _mm_unpacklo_epi16(x2, x3); - // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 - x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 0*out_p), - _mm_castsi128_pd(x6)); // 00 10 20 30 40 50 60 70 - _mm_storeh_pd((double *)(out + 1*out_p), - _mm_castsi128_pd(x6)); // 01 11 21 31 41 51 61 71 - // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 - x7 = _mm_unpackhi_epi32(x4, x5); - _mm_storel_pd((double *)(out + 2*out_p), - _mm_castsi128_pd(x7)); // 02 12 22 32 42 52 62 72 - _mm_storeh_pd((double *)(out + 3*out_p), - _mm_castsi128_pd(x7)); // 03 13 23 33 43 53 63 73 - - // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 - x4 = _mm_unpackhi_epi16(x0, x1); - // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 - x5 = _mm_unpackhi_epi16(x2, x3); - // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 - x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 4*out_p), - _mm_castsi128_pd(x6)); // 04 14 24 34 44 54 64 74 - _mm_storeh_pd((double *)(out + 5*out_p), - _mm_castsi128_pd(x6)); // 05 15 25 35 45 55 65 75 - // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 - x7 = _mm_unpackhi_epi32(x4, x5); - - _mm_storel_pd((double *)(out + 6*out_p), - _mm_castsi128_pd(x7)); // 06 16 26 36 46 56 66 76 - _mm_storeh_pd((double *)(out + 7*out_p), - _mm_castsi128_pd(x7)); // 07 17 27 37 47 57 67 77 - } while (++idx8x8 < num_8x8_to_transpose); -} - -void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); - unsigned char *src[2]; - unsigned char *dst[2]; - - // Transpose 8x16 - transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); - - // Loop filtering - vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, - blimit1, limit1, thresh1); - src[0] = t_dst; - src[1] = t_dst + 8; - dst[0] = s - 4; - dst[1] = s - 4 + p * 8; - - // Transpose back - transpose(src, 16, dst, p, 2); -} - -void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh) { - DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]); - unsigned char *src[1]; - unsigned char *dst[1]; - - // Transpose 8x8 - src[0] = s - 4; - dst[0] = t_dst; - - transpose(src, p, dst, 8, 1); - - // Loop filtering - vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh); - - src[0] = t_dst; - dst[0] = s - 4; - - // Transpose back - transpose(src, 8, dst, p, 1); -} - -void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); - unsigned char *src[2]; - unsigned char *dst[2]; - - // Transpose 8x16 - transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); - - // Loop filtering - vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, - blimit1, limit1, thresh1); - src[0] = t_dst; - src[1] = t_dst + 8; - - dst[0] = s - 4; - dst[1] = s - 4 + p * 8; - - // Transpose back - transpose(src, 16, dst, p, 2); -} - -void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh) { - DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 16]); - unsigned char *src[2]; - unsigned char *dst[2]; - - src[0] = s - 8; - src[1] = s; - dst[0] = t_dst; - dst[1] = t_dst + 8 * 8; - - // Transpose 16x8 - transpose(src, p, dst, 8, 2); - - // Loop filtering - vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); - - src[0] = t_dst; - src[1] = t_dst + 8 * 8; - dst[0] = s - 8; - dst[1] = s; - - // Transpose back - transpose(src, 8, dst, p, 2); -} - -void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, - const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh) { - DECLARE_ALIGNED(16, unsigned char, t_dst[256]); - - // Transpose 16x16 - transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); - transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); - - // Loop filtering - vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); - - // Transpose back - transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); - transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p); -} diff --git a/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h b/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h deleted file mode 100644 index 536b206876..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_X86_TXFM_COMMON_SSE2_H_ -#define VPX_DSP_X86_TXFM_COMMON_SSE2_H_ - -#include -#include "vpx/vpx_integer.h" - -#define pair_set_epi16(a, b) \ - _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ - (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) - -#define dual_set_epi16(a, b) \ - _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \ - (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a)) - -#define octa_set_epi16(a, b, c, d, e, f, g, h) \ - _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \ - (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h)) - -#endif // VPX_DSP_X86_TXFM_COMMON_SSE2_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c deleted file mode 100644 index 422b0fc422..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/x86/convolve.h" - -#if HAVE_SSE2 -filter8_1dfunction vpx_filter_block1d16_v8_sse2; -filter8_1dfunction vpx_filter_block1d16_h8_sse2; -filter8_1dfunction vpx_filter_block1d8_v8_sse2; -filter8_1dfunction vpx_filter_block1d8_h8_sse2; -filter8_1dfunction vpx_filter_block1d4_v8_sse2; -filter8_1dfunction vpx_filter_block1d4_h8_sse2; -filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2; - -filter8_1dfunction vpx_filter_block1d16_v2_sse2; -filter8_1dfunction vpx_filter_block1d16_h2_sse2; -filter8_1dfunction vpx_filter_block1d8_v2_sse2; -filter8_1dfunction vpx_filter_block1d8_h2_sse2; -filter8_1dfunction vpx_filter_block1d4_v2_sse2; -filter8_1dfunction vpx_filter_block1d4_h2_sse2; -filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2; - -// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); -FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); -FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); - -// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, sse2); -FUN_CONV_2D(avg_ , sse2); - -#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; - -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; - -// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); -HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); -HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); -HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, - sse2); - -// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h, int bd); -HIGH_FUN_CONV_2D(, sse2); -HIGH_FUN_CONV_2D(avg_ , sse2); -#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 -#endif // HAVE_SSE2 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm deleted file mode 100644 index abc0270655..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm +++ /dev/null @@ -1,228 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro convolve_fn 1-2 -%ifidn %1, avg -%define AUX_XMM_REGS 4 -%else -%define AUX_XMM_REGS 0 -%endif -%ifidn %2, highbd -%define pavg pavgw -cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ - dst, dst_stride, \ - fx, fxs, fy, fys, w, h, bd -%else -%define pavg pavgb -cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ - dst, dst_stride, \ - fx, fxs, fy, fys, w, h -%endif - mov r4d, dword wm -%ifidn %2, highbd - shl r4d, 1 - shl srcq, 1 - shl src_strideq, 1 - shl dstq, 1 - shl dst_strideq, 1 -%else - cmp r4d, 4 - je .w4 -%endif - cmp r4d, 8 - je .w8 - cmp r4d, 16 - je .w16 - cmp r4d, 32 - je .w32 -%ifidn %2, highbd - cmp r4d, 64 - je .w64 - - mov r4d, dword hm -.loop128: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+32] - movu m3, [srcq+48] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+16] - pavg m2, [dstq+32] - pavg m3, [dstq+48] -%endif - mova [dstq ], m0 - mova [dstq+16], m1 - mova [dstq+32], m2 - mova [dstq+48], m3 - movu m0, [srcq+64] - movu m1, [srcq+80] - movu m2, [srcq+96] - movu m3, [srcq+112] - add srcq, src_strideq -%ifidn %1, avg - pavg m0, [dstq+64] - pavg m1, [dstq+80] - pavg m2, [dstq+96] - pavg m3, [dstq+112] -%endif - mova [dstq+64], m0 - mova [dstq+80], m1 - mova [dstq+96], m2 - mova [dstq+112], m3 - add dstq, dst_strideq - dec r4d - jnz .loop128 - RET -%endif - -.w64 - mov r4d, dword hm -.loop64: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+32] - movu m3, [srcq+48] - add srcq, src_strideq -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+16] - pavg m2, [dstq+32] - pavg m3, [dstq+48] -%endif - mova [dstq ], m0 - mova [dstq+16], m1 - mova [dstq+32], m2 - mova [dstq+48], m3 - add dstq, dst_strideq - dec r4d - jnz .loop64 - RET - -.w32: - mov r4d, dword hm -.loop32: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+src_strideq] - movu m3, [srcq+src_strideq+16] - lea srcq, [srcq+src_strideq*2] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq +16] - pavg m2, [dstq+dst_strideq] - pavg m3, [dstq+dst_strideq+16] -%endif - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq+dst_strideq ], m2 - mova [dstq+dst_strideq+16], m3 - lea dstq, [dstq+dst_strideq*2] - sub r4d, 2 - jnz .loop32 - RET - -.w16: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop16: - movu m0, [srcq] - movu m1, [srcq+src_strideq] - movu m2, [srcq+src_strideq*2] - movu m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+dst_strideq] - pavg m2, [dstq+dst_strideq*2] - pavg m3, [dstq+r6q] -%endif - mova [dstq ], m0 - mova [dstq+dst_strideq ], m1 - mova [dstq+dst_strideq*2], m2 - mova [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop16 - RET - -.w8: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop8: - movh m0, [srcq] - movh m1, [srcq+src_strideq] - movh m2, [srcq+src_strideq*2] - movh m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - movh m4, [dstq] - movh m5, [dstq+dst_strideq] - movh m6, [dstq+dst_strideq*2] - movh m7, [dstq+r6q] - pavg m0, m4 - pavg m1, m5 - pavg m2, m6 - pavg m3, m7 -%endif - movh [dstq ], m0 - movh [dstq+dst_strideq ], m1 - movh [dstq+dst_strideq*2], m2 - movh [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop8 - RET - -%ifnidn %2, highbd -.w4: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop4: - movd m0, [srcq] - movd m1, [srcq+src_strideq] - movd m2, [srcq+src_strideq*2] - movd m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - movd m4, [dstq] - movd m5, [dstq+dst_strideq] - movd m6, [dstq+dst_strideq*2] - movd m7, [dstq+r6q] - pavg m0, m4 - pavg m1, m5 - pavg m2, m6 - pavg m3, m7 -%endif - movd [dstq ], m0 - movd [dstq+dst_strideq ], m1 - movd [dstq+dst_strideq*2], m2 - movd [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop4 - RET -%endif -%endmacro - -INIT_XMM sse2 -convolve_fn copy -convolve_fn avg -%if CONFIG_VP9_HIGHBITDEPTH -convolve_fn copy, highbd -convolve_fn avg, highbd -%endif diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c deleted file mode 100644 index d8a92354c9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c +++ /dev/null @@ -1,606 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Due to a header conflict between math.h and intrinsics includes with ceil() -// in certain configurations under vs9 this include needs to precede -// immintrin.h. - -#include - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/x86/convolve.h" -#include "vpx_ports/mem.h" - -// filters for 16_h8 and 16_v8 -DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = { - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 -}; - -#if defined(__clang__) -// -- GODOT start - -# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ - (!defined(__MACPORTS__) && defined(__APPLE__) && \ - ((__clang_major__ == 4 && __clang_minor__ <= 2) || \ - (__clang_major__ == 5 && __clang_minor__ == 0))) -// -- GODOT end -- -# define MM256_BROADCASTSI128_SI256(x) \ - _mm_broadcastsi128_si256((__m128i const *)&(x)) -# else // clang > 3.3, and not 5.0 on macosx. -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -# endif // clang <= 3.3 -#elif defined(__GNUC__) -# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6) -# define MM256_BROADCASTSI128_SI256(x) \ - _mm_broadcastsi128_si256((__m128i const *)&(x)) -# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7 -# define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x) -# else // gcc > 4.7 -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -# endif // gcc <= 4.6 -#else // !(gcc || clang) -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -#endif // __clang__ - -static void vpx_filter_block1d16_h8_avx2(const uint8_t *src_ptr, - ptrdiff_t src_pixels_per_line, - uint8_t *output_ptr, - ptrdiff_t output_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i filtersReg; - __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m256i firstFilters, secondFilters, thirdFilters, forthFilters; - __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3; - __m256i srcReg32b1, srcReg32b2, filtersReg32; - unsigned int i; - ptrdiff_t src_stride, dst_stride; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - // have the same data in both lanes of a 256 bit register - filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 256 bit register - firstFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 256 bit register - secondFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 256 bit register - thirdFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 256 bit register - forthFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x706u)); - - filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2); - filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2); - filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2); - filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2); - - // multiple the size of the source and destination stride by two - src_stride = src_pixels_per_line << 1; - dst_stride = output_pitch << 1; - for (i = output_height; i > 1; i-=2) { - // load the 2 strides of source - srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr - 3))); - srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, - _mm_loadu_si128((const __m128i *) - (src_ptr+src_pixels_per_line-3)), 1); - - // filter the source buffer - srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2); - - // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt2Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, - _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - // reading 2 strides of the next 16 bytes - // (part of it was being read by earlier read) - srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + 5))); - srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, - _mm_loadu_si128((const __m128i *) - (src_ptr+src_pixels_per_line+5)), 1); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, - _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - // filter the source buffer - srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg); - srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); - - // add and saturate the results together - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2); - - // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt2Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); - - // add and saturate the results together - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, - _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, - _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64); - - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7); - srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, - srcRegFilt32b2_1); - - src_ptr+=src_stride; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, - _mm256_castsi256_si128(srcRegFilt32b1_1)); - - // save the next 16 bits - _mm_store_si128((__m128i*)(output_ptr+output_pitch), - _mm256_extractf128_si256(srcRegFilt32b1_1, 1)); - output_ptr+=dst_stride; - } - - // if the number of strides is odd. - // process only 16 bytes - if (i > 0) { - __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1; - __m128i srcRegFilt2, srcRegFilt3; - - srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); - - // filter the source buffer - srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt1Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt4Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, - _mm256_castsi256_si128(firstFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt2Reg)); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt3Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(secondFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - - // reading the next 16 bytes - // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - // filter the source buffer - srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt1Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt4Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, - _mm256_castsi256_si128(firstFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt2Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt3Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(secondFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm256_castsi256_si128(addFilterReg64)); - - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm256_castsi256_si128(addFilterReg64)); - - // shift by 7 bit each 16 bit - srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7); - srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1); - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1); - } -} - -static void vpx_filter_block1d16_v8_avx2(const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i filtersReg; - __m256i addFilterReg64; - __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5; - __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10; - __m256i srcReg32b11, srcReg32b12, filtersReg32; - __m256i firstFilters, secondFilters, thirdFilters, forthFilters; - unsigned int i; - ptrdiff_t src_stride, dst_stride; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the - // same data in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - // have the same data in both lanes of a 256 bit register - filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 256 bit register - firstFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 256 bit register - secondFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 256 bit register - thirdFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 256 bit register - forthFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x706u)); - - // multiple the size of the source and destination stride by two - src_stride = src_pitch << 1; - dst_stride = out_pitch << 1; - - // load 16 bytes 7 times in stride of src_pitch - srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr))); - srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch))); - srcReg32b3 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2))); - srcReg32b4 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3))); - srcReg32b5 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4))); - srcReg32b6 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5))); - srcReg32b7 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6))); - - // have each consecutive loads on the same 256 register - srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, - _mm256_castsi256_si128(srcReg32b2), 1); - srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, - _mm256_castsi256_si128(srcReg32b3), 1); - srcReg32b3 = _mm256_inserti128_si256(srcReg32b3, - _mm256_castsi256_si128(srcReg32b4), 1); - srcReg32b4 = _mm256_inserti128_si256(srcReg32b4, - _mm256_castsi256_si128(srcReg32b5), 1); - srcReg32b5 = _mm256_inserti128_si256(srcReg32b5, - _mm256_castsi256_si128(srcReg32b6), 1); - srcReg32b6 = _mm256_inserti128_si256(srcReg32b6, - _mm256_castsi256_si128(srcReg32b7), 1); - - // merge every two consecutive registers except the last one - srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2); - srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2); - - // save - srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4); - - // save - srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4); - - // save - srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6); - - // save - srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6); - - - for (i = output_height; i > 1; i-=2) { - // load the last 2 loads of 16 bytes and have every two - // consecutive loads in the same 256 bit register - srcReg32b8 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7))); - srcReg32b7 = _mm256_inserti128_si256(srcReg32b7, - _mm256_castsi256_si128(srcReg32b8), 1); - srcReg32b9 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 8))); - srcReg32b8 = _mm256_inserti128_si256(srcReg32b8, - _mm256_castsi256_si128(srcReg32b9), 1); - - // merge every two consecutive registers - // save - srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8); - srcReg32b7 = _mm256_unpackhi_epi8(srcReg32b7, srcReg32b8); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters); - srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters); - - // add and saturate the results together - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters); - srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters); - - // add and saturate the results together - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, - _mm256_min_epi16(srcReg32b8, srcReg32b12)); - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, - _mm256_max_epi16(srcReg32b8, srcReg32b12)); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters); - srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters); - - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters); - srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters); - - // add and saturate the results together - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_min_epi16(srcReg32b8, srcReg32b12)); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_max_epi16(srcReg32b8, srcReg32b12)); - - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7); - srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcReg32b1 = _mm256_packus_epi16(srcReg32b10, srcReg32b1); - - src_ptr+=src_stride; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, - _mm256_castsi256_si128(srcReg32b1)); - - // save the next 16 bits - _mm_store_si128((__m128i*)(output_ptr+out_pitch), - _mm256_extractf128_si256(srcReg32b1, 1)); - - output_ptr+=dst_stride; - - // save part of the registers for next strides - srcReg32b10 = srcReg32b11; - srcReg32b1 = srcReg32b3; - srcReg32b11 = srcReg32b2; - srcReg32b3 = srcReg32b5; - srcReg32b2 = srcReg32b4; - srcReg32b5 = srcReg32b7; - srcReg32b7 = srcReg32b9; - } - if (i > 0) { - __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5; - __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8; - // load the last 16 bytes - srcRegFilt8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); - - // merge the last 2 results together - srcRegFilt4 = _mm_unpacklo_epi8( - _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); - srcRegFilt7 = _mm_unpackhi_epi8( - _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10), - _mm256_castsi256_si128(firstFilters)); - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, - _mm256_castsi256_si128(forthFilters)); - srcRegFilt3 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b1), - _mm256_castsi256_si128(firstFilters)); - srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, srcRegFilt7); - - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11), - _mm256_castsi256_si128(secondFilters)); - srcRegFilt5 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b3), - _mm256_castsi256_si128(secondFilters)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2), - _mm256_castsi256_si128(thirdFilters)); - srcRegFilt7 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b5), - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_min_epi16(srcRegFilt4, srcRegFilt6)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm_min_epi16(srcRegFilt5, srcRegFilt7)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_max_epi16(srcRegFilt4, srcRegFilt6)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm_max_epi16(srcRegFilt5, srcRegFilt7)); - - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm256_castsi256_si128(addFilterReg64)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm256_castsi256_si128(addFilterReg64)); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3); - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); - } -} - -#if HAVE_AVX2 && HAVE_SSSE3 -filter8_1dfunction vpx_filter_block1d4_v8_ssse3; -#if ARCH_X86_64 -filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; -#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3 -#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3 -#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3 -#else // ARCH_X86 -filter8_1dfunction vpx_filter_block1d8_v8_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_ssse3; -#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3 -#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3 -#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3 -#endif // ARCH_X86_64 -filter8_1dfunction vpx_filter_block1d16_v2_ssse3; -filter8_1dfunction vpx_filter_block1d16_h2_ssse3; -filter8_1dfunction vpx_filter_block1d8_v2_ssse3; -filter8_1dfunction vpx_filter_block1d8_h2_ssse3; -filter8_1dfunction vpx_filter_block1d4_v2_ssse3; -filter8_1dfunction vpx_filter_block1d4_h2_ssse3; -#define vpx_filter_block1d4_v8_avx2 vpx_filter_block1d4_v8_ssse3 -#define vpx_filter_block1d16_v2_avx2 vpx_filter_block1d16_v2_ssse3 -#define vpx_filter_block1d16_h2_avx2 vpx_filter_block1d16_h2_ssse3 -#define vpx_filter_block1d8_v2_avx2 vpx_filter_block1d8_v2_ssse3 -#define vpx_filter_block1d8_h2_avx2 vpx_filter_block1d8_h2_ssse3 -#define vpx_filter_block1d4_v2_avx2 vpx_filter_block1d4_v2_ssse3 -#define vpx_filter_block1d4_h2_avx2 vpx_filter_block1d4_h2_ssse3 -// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); - -// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, avx2); -#endif // HAVE_AX2 && HAVE_SSSE3 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c deleted file mode 100644 index 6fd52087c7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c +++ /dev/null @@ -1,915 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Due to a header conflict between math.h and intrinsics includes with ceil() -// in certain configurations under vs9 this include needs to precede -// tmmintrin.h. - -#include - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_filter.h" -#include "vpx_dsp/x86/convolve.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/emmintrin_compat.h" - -// filters only for the 4_h8 convolution -DECLARE_ALIGNED(16, static const uint8_t, filt1_4_h8[16]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt2_4_h8[16]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -// filters for 8_h8 and 16_h8 -DECLARE_ALIGNED(16, static const uint8_t, filt1_global[16]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt2_global[16]) = { - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt3_global[16]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = { - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 -}; - -// These are reused by the avx2 intrinsics. -filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; - -void vpx_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pixels_per_line, - uint8_t *output_ptr, - ptrdiff_t output_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i firstFilters, secondFilters, shuffle1, shuffle2; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, srcReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 =_mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter into the first lane - firstFilters = _mm_shufflelo_epi16(filtersReg, 0); - // duplicate only the third 16 bit in the filter into the first lane - secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu); - // duplicate only the seconds 16 bits in the filter into the second lane - // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3 - firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u); - // duplicate only the forth 16 bits in the filter into the second lane - // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7 - secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu); - - // loading the local filters - shuffle1 =_mm_load_si128((__m128i const *)filt1_4_h8); - shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1); - srcRegFilt2= _mm_shuffle_epi8(srcReg, shuffle2); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // extract the higher half of the lane - srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8); - srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8); - - minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2); - - // add and saturate all the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - src_ptr+=src_pixels_per_line; - - // save only 4 bytes - *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1); - - output_ptr+=output_pitch; - } -} - -void vpx_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pixels_per_line, - uint8_t *output_ptr, - ptrdiff_t output_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg; - __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 128 bit register - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 128 bit register - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 128 bit register - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 128 bit register - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - filt1Reg = _mm_load_si128((__m128i const *)filt1_global); - filt2Reg = _mm_load_si128((__m128i const *)filt2_global); - filt3Reg = _mm_load_si128((__m128i const *)filt3_global); - filt4Reg = _mm_load_si128((__m128i const *)filt4_global); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg, filt2Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg, filt3Reg); - srcRegFilt4= _mm_shuffle_epi8(srcReg, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters); - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters); - - // add and saturate all the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - - srcRegFilt2= _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pixels_per_line; - - // save only 8 bytes - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=output_pitch; - } -} - -void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i addFilterReg64, filtersReg, minReg; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5; - __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7; - __m128i srcReg8; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - // load the first 7 rows of 8 bytes - srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr); - srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); - srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); - srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); - srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)); - srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)); - srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)); - - for (i = 0; i < output_height; i++) { - // load the last 8 bytes - srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)); - - // merge the result together - srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2); - srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4); - - // merge the result together - srcRegFilt2 = _mm_unpacklo_epi8(srcReg5, srcReg6); - srcRegFilt5 = _mm_unpacklo_epi8(srcReg7, srcReg8); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters); - - // add and saturate the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5); - srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pitch; - - // shift down a row - srcReg1 = srcReg2; - srcReg2 = srcReg3; - srcReg3 = srcReg4; - srcReg4 = srcReg5; - srcReg5 = srcReg6; - srcReg6 = srcReg7; - srcReg7 = srcReg8; - - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=out_pitch; - } -} - -filter8_1dfunction vpx_filter_block1d16_v8_ssse3; -filter8_1dfunction vpx_filter_block1d16_h8_ssse3; -filter8_1dfunction vpx_filter_block1d8_v8_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_ssse3; -filter8_1dfunction vpx_filter_block1d4_v8_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_ssse3; -filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3; - -filter8_1dfunction vpx_filter_block1d16_v2_ssse3; -filter8_1dfunction vpx_filter_block1d16_h2_ssse3; -filter8_1dfunction vpx_filter_block1d8_v2_ssse3; -filter8_1dfunction vpx_filter_block1d8_h2_ssse3; -filter8_1dfunction vpx_filter_block1d4_v2_ssse3; -filter8_1dfunction vpx_filter_block1d4_h2_ssse3; -filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; - -// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); -FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); -FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, - ssse3); - -#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) { \ - const __m128i tr0_0 = _mm_unpacklo_epi8(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi8(in2, in3); \ - const __m128i tr0_2 = _mm_unpacklo_epi8(in4, in5); \ - const __m128i tr0_3 = _mm_unpacklo_epi8(in6, in7); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi16(tr0_0, tr0_1); \ - const __m128i tr1_1 = _mm_unpackhi_epi16(tr0_0, tr0_1); \ - const __m128i tr1_2 = _mm_unpacklo_epi16(tr0_2, tr0_3); \ - const __m128i tr1_3 = _mm_unpackhi_epi16(tr0_2, tr0_3); \ - \ - const __m128i tr2_0 = _mm_unpacklo_epi32(tr1_0, tr1_2); \ - const __m128i tr2_1 = _mm_unpackhi_epi32(tr1_0, tr1_2); \ - const __m128i tr2_2 = _mm_unpacklo_epi32(tr1_1, tr1_3); \ - const __m128i tr2_3 = _mm_unpackhi_epi32(tr1_1, tr1_3); \ - \ - out0 = _mm_unpacklo_epi64(tr2_0, tr2_0); \ - out1 = _mm_unpackhi_epi64(tr2_0, tr2_0); \ - out2 = _mm_unpacklo_epi64(tr2_1, tr2_1); \ - out3 = _mm_unpackhi_epi64(tr2_1, tr2_1); \ - out4 = _mm_unpacklo_epi64(tr2_2, tr2_2); \ - out5 = _mm_unpackhi_epi64(tr2_2, tr2_2); \ - out6 = _mm_unpacklo_epi64(tr2_3, tr2_3); \ - out7 = _mm_unpackhi_epi64(tr2_3, tr2_3); \ -} - -static void filter_horiz_w8_ssse3(const uint8_t *src_x, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *x_filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)x_filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_loadl_epi64((const __m128i *)src_x); - const __m128i B = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch)); - const __m128i C = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 2)); - const __m128i D = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 3)); - const __m128i E = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 4)); - const __m128i F = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 5)); - const __m128i G = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 6)); - const __m128i H = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 7)); - // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17 - const __m128i tr0_0 = _mm_unpacklo_epi16(A, B); - // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37 - const __m128i tr0_1 = _mm_unpacklo_epi16(C, D); - // 40 41 50 51 42 43 52 53 44 45 54 55 46 47 56 57 - const __m128i tr0_2 = _mm_unpacklo_epi16(E, F); - // 60 61 70 71 62 63 72 73 64 65 74 75 66 67 76 77 - const __m128i tr0_3 = _mm_unpacklo_epi16(G, H); - // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33 - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37 - const __m128i tr1_1 = _mm_unpackhi_epi32(tr0_0, tr0_1); - // 40 41 50 51 60 61 70 71 42 43 52 53 62 63 72 73 - const __m128i tr1_2 = _mm_unpacklo_epi32(tr0_2, tr0_3); - // 44 45 54 55 64 65 74 75 46 47 56 57 66 67 76 77 - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); - // 00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71 - const __m128i s1s0 = _mm_unpacklo_epi64(tr1_0, tr1_2); - const __m128i s3s2 = _mm_unpackhi_epi64(tr1_0, tr1_2); - const __m128i s5s4 = _mm_unpacklo_epi64(tr1_1, tr1_3); - const __m128i s7s6 = _mm_unpackhi_epi64(tr1_1, tr1_3); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)dst, temp); -} - -static void transpose8x8_to_dst(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride) { - __m128i A, B, C, D, E, F, G, H; - - A = _mm_loadl_epi64((const __m128i *)src); - B = _mm_loadl_epi64((const __m128i *)(src + src_stride)); - C = _mm_loadl_epi64((const __m128i *)(src + src_stride * 2)); - D = _mm_loadl_epi64((const __m128i *)(src + src_stride * 3)); - E = _mm_loadl_epi64((const __m128i *)(src + src_stride * 4)); - F = _mm_loadl_epi64((const __m128i *)(src + src_stride * 5)); - G = _mm_loadl_epi64((const __m128i *)(src + src_stride * 6)); - H = _mm_loadl_epi64((const __m128i *)(src + src_stride * 7)); - - TRANSPOSE_8X8(A, B, C, D, E, F, G, H, - A, B, C, D, E, F, G, H); - - _mm_storel_epi64((__m128i*)dst, A); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 1), B); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 2), C); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 3), D); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 4), E); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 5), F); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 6), G); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 7), H); -} - -static void scaledconvolve_horiz_w8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - DECLARE_ALIGNED(16, uint8_t, temp[8 * 8]); - int x, y, z; - src -= SUBPEL_TAPS / 2 - 1; - - // This function processes 8x8 areas. The intermediate height is not always - // a multiple of 8, so force it to be a multiple of 8 here. - y = h + (8 - (h & 0x7)); - - do { - int x_q4 = x0_q4; - for (x = 0; x < w; x += 8) { - // process 8 src_x steps - for (z = 0; z < 8; ++z) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - if (x_q4 & SUBPEL_MASK) { - filter_horiz_w8_ssse3(src_x, src_stride, temp + (z * 8), x_filter); - } else { - int i; - for (i = 0; i < 8; ++i) { - temp[z * 8 + i] = src_x[i * src_stride + 3]; - } - } - x_q4 += x_step_q4; - } - - // transpose the 8x8 filters values back to dst - transpose8x8_to_dst(temp, 8, dst + x, dst_stride); - } - - src += src_stride * 8; - dst += dst_stride * 8; - } while (y -= 8); -} - -static void filter_horiz_w4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_loadl_epi64((const __m128i *)src_ptr); - const __m128i B = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); - const __m128i C = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); - const __m128i D = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); - // TRANSPOSE... - // 00 01 02 03 04 05 06 07 - // 10 11 12 13 14 15 16 17 - // 20 21 22 23 24 25 26 27 - // 30 31 32 33 34 35 36 37 - // - // TO - // - // 00 10 20 30 - // 01 11 21 31 - // 02 12 22 32 - // 03 13 23 33 - // 04 14 24 34 - // 05 15 25 35 - // 06 16 26 36 - // 07 17 27 37 - // - // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17 - const __m128i tr0_0 = _mm_unpacklo_epi16(A, B); - // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37 - const __m128i tr0_1 = _mm_unpacklo_epi16(C, D); - // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33 - const __m128i s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37 - const __m128i s5s4 = _mm_unpackhi_epi32(tr0_0, tr0_1); - // 02 03 12 13 22 23 32 33 - const __m128i s3s2 = _mm_srli_si128(s1s0, 8); - // 06 07 16 17 26 27 36 37 - const __m128i s7s6 = _mm_srli_si128(s5s4, 8); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 4 bytes - *(int *)dst = _mm_cvtsi128_si32(temp); -} - -static void transpose4x4_to_dst(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride) { - __m128i A = _mm_cvtsi32_si128(*(const int *)src); - __m128i B = _mm_cvtsi32_si128(*(const int *)(src + src_stride)); - __m128i C = _mm_cvtsi32_si128(*(const int *)(src + src_stride * 2)); - __m128i D = _mm_cvtsi32_si128(*(const int *)(src + src_stride * 3)); - // 00 10 01 11 02 12 03 13 - const __m128i tr0_0 = _mm_unpacklo_epi8(A, B); - // 20 30 21 31 22 32 23 33 - const __m128i tr0_1 = _mm_unpacklo_epi8(C, D); - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - A = _mm_unpacklo_epi16(tr0_0, tr0_1); - B = _mm_srli_si128(A, 4); - C = _mm_srli_si128(A, 8); - D = _mm_srli_si128(A, 12); - - *(int *)(dst) = _mm_cvtsi128_si32(A); - *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(B); - *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(C); - *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(D); -} - -static void scaledconvolve_horiz_w4(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - DECLARE_ALIGNED(16, uint8_t, temp[4 * 4]); - int x, y, z; - src -= SUBPEL_TAPS / 2 - 1; - - for (y = 0; y < h; y += 4) { - int x_q4 = x0_q4; - for (x = 0; x < w; x += 4) { - // process 4 src_x steps - for (z = 0; z < 4; ++z) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - if (x_q4 & SUBPEL_MASK) { - filter_horiz_w4_ssse3(src_x, src_stride, temp + (z * 4), x_filter); - } else { - int i; - for (i = 0; i < 4; ++i) { - temp[z * 4 + i] = src_x[i * src_stride + 3]; - } - } - x_q4 += x_step_q4; - } - - // transpose the 4x4 filters values back to dst - transpose4x4_to_dst(temp, 4, dst + x, dst_stride); - } - - src += src_stride * 4; - dst += dst_stride * 4; - } -} - -static void filter_vert_w4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_cvtsi32_si128(*(const int *)src_ptr); - const __m128i B = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch)); - const __m128i C = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 2)); - const __m128i D = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 3)); - const __m128i E = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 4)); - const __m128i F = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 5)); - const __m128i G = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 6)); - const __m128i H = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 7)); - const __m128i s1s0 = _mm_unpacklo_epi8(A, B); - const __m128i s3s2 = _mm_unpacklo_epi8(C, D); - const __m128i s5s4 = _mm_unpacklo_epi8(E, F); - const __m128i s7s6 = _mm_unpacklo_epi8(G, H); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 4 bytes - *(int *)dst = _mm_cvtsi128_si32(temp); -} - -static void scaledconvolve_vert_w4(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int y; - int y_q4 = y0_q4; - - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - - if (y_q4 & SUBPEL_MASK) { - filter_vert_w4_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); - } else { - memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); - } - - y_q4 += y_step_q4; - } -} - -static void filter_vert_w8_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_loadl_epi64((const __m128i *)src_ptr); - const __m128i B = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); - const __m128i C = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); - const __m128i D = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); - const __m128i E = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)); - const __m128i F = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)); - const __m128i G = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)); - const __m128i H = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)); - const __m128i s1s0 = _mm_unpacklo_epi8(A, B); - const __m128i s3s2 = _mm_unpacklo_epi8(C, D); - const __m128i s5s4 = _mm_unpacklo_epi8(E, F); - const __m128i s7s6 = _mm_unpacklo_epi8(G, H); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)dst, temp); -} - -static void scaledconvolve_vert_w8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int y; - int y_q4 = y0_q4; - - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - if (y_q4 & SUBPEL_MASK) { - filter_vert_w8_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); - } else { - memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); - } - y_q4 += y_step_q4; - } -} - -static void filter_vert_w16_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter, int w) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - int i; - - for (i = 0; i < w; i += 16) { - const __m128i A = _mm_loadu_si128((const __m128i *)src_ptr); - const __m128i B = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)); - const __m128i C = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)); - const __m128i D = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)); - const __m128i E = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)); - const __m128i F = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)); - const __m128i G = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)); - const __m128i H = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); - // merge the result together - const __m128i s1s0_lo = _mm_unpacklo_epi8(A, B); - const __m128i s7s6_lo = _mm_unpacklo_epi8(G, H); - const __m128i s1s0_hi = _mm_unpackhi_epi8(A, B); - const __m128i s7s6_hi = _mm_unpackhi_epi8(G, H); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0_lo = _mm_maddubs_epi16(s1s0_lo, f1f0); - const __m128i x3_lo = _mm_maddubs_epi16(s7s6_lo, f7f6); - const __m128i x0_hi = _mm_maddubs_epi16(s1s0_hi, f1f0); - const __m128i x3_hi = _mm_maddubs_epi16(s7s6_hi, f7f6); - // add and saturate the results together - const __m128i x3x0_lo = _mm_adds_epi16(x0_lo, x3_lo); - const __m128i x3x0_hi = _mm_adds_epi16(x0_hi, x3_hi); - // merge the result together - const __m128i s3s2_lo = _mm_unpacklo_epi8(C, D); - const __m128i s3s2_hi = _mm_unpackhi_epi8(C, D); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x1_lo = _mm_maddubs_epi16(s3s2_lo, f3f2); - const __m128i x1_hi = _mm_maddubs_epi16(s3s2_hi, f3f2); - // merge the result together - const __m128i s5s4_lo = _mm_unpacklo_epi8(E, F); - const __m128i s5s4_hi = _mm_unpackhi_epi8(E, F); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x2_lo = _mm_maddubs_epi16(s5s4_lo, f5f4); - const __m128i x2_hi = _mm_maddubs_epi16(s5s4_hi, f5f4); - // add and saturate the results together - __m128i temp_lo = _mm_adds_epi16(x3x0_lo, _mm_min_epi16(x1_lo, x2_lo)); - __m128i temp_hi = _mm_adds_epi16(x3x0_hi, _mm_min_epi16(x1_hi, x2_hi)); - - // add and saturate the results together - temp_lo = _mm_adds_epi16(temp_lo, _mm_max_epi16(x1_lo, x2_lo)); - temp_hi = _mm_adds_epi16(temp_hi, _mm_max_epi16(x1_hi, x2_hi)); - // round and shift by 7 bit each 16 bit - temp_lo = _mm_mulhrs_epi16(temp_lo, k_256); - temp_hi = _mm_mulhrs_epi16(temp_hi, k_256); - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - temp_hi = _mm_packus_epi16(temp_lo, temp_hi); - src_ptr += 16; - // save 16 bytes convolve result - _mm_store_si128((__m128i*)&dst[i], temp_hi); - } -} - -static void scaledconvolve_vert_w16(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int y; - int y_q4 = y0_q4; - - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - if (y_q4 & SUBPEL_MASK) { - filter_vert_w16_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter, - w); - } else { - memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); - } - y_q4 += y_step_q4; - } -} - -static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *const x_filters, - int x0_q4, int x_step_q4, - const InterpKernel *const y_filters, - int y0_q4, int y_step_q4, - int w, int h) { - // Note: Fixed size intermediate buffer, temp, places limits on parameters. - // 2d filtering proceeds in 2 steps: - // (1) Interpolate horizontally into an intermediate buffer, temp. - // (2) Interpolate temp vertically to derive the sub-pixel result. - // Deriving the maximum number of rows in the temp buffer (135): - // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). - // --Largest block size is 64x64 pixels. - // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the - // original frame (in 1/16th pixel units). - // --Must round-up because block may be located at sub-pixel position. - // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. - // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - // --Require an additional 8 rows for the horiz_w8 transpose tail. - DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]); - const int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - - assert(w <= 64); - assert(h <= 64); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - - if (w >= 8) { - scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, temp, 64, x_filters, x0_q4, x_step_q4, - w, intermediate_height); - } else { - scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, temp, 64, x_filters, x0_q4, x_step_q4, - w, intermediate_height); - } - - if (w >= 16) { - scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, - dst_stride, y_filters, y0_q4, y_step_q4, w, h); - } else if (w == 8) { - scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, - dst_stride, y_filters, y0_q4, y_step_q4, w, h); - } else { - scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, - dst_stride, y_filters, y0_q4, y_step_q4, w, h); - } -} - -static const InterpKernel *get_filter_base(const int16_t *filter) { - // NOTE: This assumes that the filter table is 256-byte aligned. - // TODO(agrange) Modify to make independent of table alignment. - return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); -} - -static int get_filter_offset(const int16_t *f, const InterpKernel *base) { - return (int)((const InterpKernel *)(intptr_t)f - base); -} - -void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - scaledconvolve2d(src, src_stride, dst, dst_stride, - filters_x, x0_q4, x_step_q4, - filters_y, y0_q4, y_step_q4, w, h); -} - -// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, ssse3); -FUN_CONV_2D(avg_ , ssse3); diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm deleted file mode 100644 index 08f3d6a6cf..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm +++ /dev/null @@ -1,987 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;Note: tap3 and tap4 have to be applied and added after other taps to avoid -;overflow. - -%macro GET_FILTERS_4 0 - mov rdx, arg(5) ;filter ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - pshuflw xmm0, xmm7, 0b ;k0 - pshuflw xmm1, xmm7, 01010101b ;k1 - pshuflw xmm2, xmm7, 10101010b ;k2 - pshuflw xmm3, xmm7, 11111111b ;k3 - psrldq xmm7, 8 - pshuflw xmm4, xmm7, 0b ;k4 - pshuflw xmm5, xmm7, 01010101b ;k5 - pshuflw xmm6, xmm7, 10101010b ;k6 - pshuflw xmm7, xmm7, 11111111b ;k7 - - punpcklqdq xmm0, xmm1 - punpcklqdq xmm2, xmm3 - punpcklqdq xmm5, xmm4 - punpcklqdq xmm6, xmm7 - - movdqa k0k1, xmm0 - movdqa k2k3, xmm2 - movdqa k5k4, xmm5 - movdqa k6k7, xmm6 - - movq xmm6, rcx - pshufd xmm6, xmm6, 0 - movdqa krd, xmm6 - - pxor xmm7, xmm7 - movdqa zero, xmm7 -%endm - -%macro APPLY_FILTER_4 1 - punpckldq xmm0, xmm1 ;two row in one register - punpckldq xmm6, xmm7 - punpckldq xmm2, xmm3 - punpckldq xmm5, xmm4 - - punpcklbw xmm0, zero ;unpack to word - punpcklbw xmm6, zero - punpcklbw xmm2, zero - punpcklbw xmm5, zero - - pmullw xmm0, k0k1 ;multiply the filter factors - pmullw xmm6, k6k7 - pmullw xmm2, k2k3 - pmullw xmm5, k5k4 - - paddsw xmm0, xmm6 ;sum - movdqa xmm1, xmm0 - psrldq xmm1, 8 - paddsw xmm0, xmm1 - paddsw xmm0, xmm2 - psrldq xmm2, 8 - paddsw xmm0, xmm5 - psrldq xmm5, 8 - paddsw xmm0, xmm2 - paddsw xmm0, xmm5 - - paddsw xmm0, krd ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movd [rdi], xmm0 -%endm - -%macro GET_FILTERS 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - pshuflw xmm0, xmm7, 0b ;k0 - pshuflw xmm1, xmm7, 01010101b ;k1 - pshuflw xmm2, xmm7, 10101010b ;k2 - pshuflw xmm3, xmm7, 11111111b ;k3 - pshufhw xmm4, xmm7, 0b ;k4 - pshufhw xmm5, xmm7, 01010101b ;k5 - pshufhw xmm6, xmm7, 10101010b ;k6 - pshufhw xmm7, xmm7, 11111111b ;k7 - - punpcklwd xmm0, xmm0 - punpcklwd xmm1, xmm1 - punpcklwd xmm2, xmm2 - punpcklwd xmm3, xmm3 - punpckhwd xmm4, xmm4 - punpckhwd xmm5, xmm5 - punpckhwd xmm6, xmm6 - punpckhwd xmm7, xmm7 - - movdqa k0, xmm0 ;store filter factors on stack - movdqa k1, xmm1 - movdqa k2, xmm2 - movdqa k3, xmm3 - movdqa k4, xmm4 - movdqa k5, xmm5 - movdqa k6, xmm6 - movdqa k7, xmm7 - - movq xmm6, rcx - pshufd xmm6, xmm6, 0 - movdqa krd, xmm6 ;rounding - - pxor xmm7, xmm7 - movdqa zero, xmm7 -%endm - -%macro LOAD_VERT_8 1 - movq xmm0, [rsi + %1] ;0 - movq xmm1, [rsi + rax + %1] ;1 - movq xmm6, [rsi + rdx * 2 + %1] ;6 - lea rsi, [rsi + rax] - movq xmm7, [rsi + rdx * 2 + %1] ;7 - movq xmm2, [rsi + rax + %1] ;2 - movq xmm3, [rsi + rax * 2 + %1] ;3 - movq xmm4, [rsi + rdx + %1] ;4 - movq xmm5, [rsi + rax * 4 + %1] ;5 -%endm - -%macro APPLY_FILTER_8 2 - punpcklbw xmm0, zero - punpcklbw xmm1, zero - punpcklbw xmm6, zero - punpcklbw xmm7, zero - punpcklbw xmm2, zero - punpcklbw xmm5, zero - punpcklbw xmm3, zero - punpcklbw xmm4, zero - - pmullw xmm0, k0 - pmullw xmm1, k1 - pmullw xmm6, k6 - pmullw xmm7, k7 - pmullw xmm2, k2 - pmullw xmm5, k5 - pmullw xmm3, k3 - pmullw xmm4, k4 - - paddsw xmm0, xmm1 - paddsw xmm0, xmm6 - paddsw xmm0, xmm7 - paddsw xmm0, xmm2 - paddsw xmm0, xmm5 - paddsw xmm0, xmm3 - paddsw xmm0, xmm4 - - paddsw xmm0, krd ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte -%if %1 - movq xmm1, [rdi + %2] - pavgb xmm0, xmm1 -%endif - movq [rdi + %2], xmm0 -%endm - -;void vpx_filter_block1d4_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d4_v8_sse2) PRIVATE -sym(vpx_filter_block1d4_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movd xmm0, [rsi] ;load src: row 0 - movd xmm1, [rsi + rax] ;1 - movd xmm6, [rsi + rdx * 2] ;6 - lea rsi, [rsi + rax] - movd xmm7, [rsi + rdx * 2] ;7 - movd xmm2, [rsi + rax] ;2 - movd xmm3, [rsi + rax * 2] ;3 - movd xmm4, [rsi + rdx] ;4 - movd xmm5, [rsi + rax * 4] ;5 - - APPLY_FILTER_4 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d8_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d8_v8_sse2) PRIVATE -sym(vpx_filter_block1d8_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 0, 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d16_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d16_v8_sse2) PRIVATE -sym(vpx_filter_block1d16_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 0, 0 - sub rsi, rax - - LOAD_VERT_8 8 - APPLY_FILTER_8 0, 8 - add rdi, rbx - - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_v8_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movd xmm0, [rsi] ;load src: row 0 - movd xmm1, [rsi + rax] ;1 - movd xmm6, [rsi + rdx * 2] ;6 - lea rsi, [rsi + rax] - movd xmm7, [rsi + rdx * 2] ;7 - movd xmm2, [rsi + rax] ;2 - movd xmm3, [rsi + rax * 2] ;3 - movd xmm4, [rsi + rdx] ;4 - movd xmm5, [rsi + rax * 4] ;5 - - APPLY_FILTER_4 1 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v8_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 1, 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v8_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 1, 0 - sub rsi, rax - - LOAD_VERT_8 8 - APPLY_FILTER_8 1, 8 - add rdi, rbx - - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d4_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d4_h8_sse2) PRIVATE -sym(vpx_filter_block1d4_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm3, xmm0 - movdqa xmm5, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm3, 3 - psrldq xmm5, 5 - psrldq xmm4, 4 - - APPLY_FILTER_4 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d8_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d8_h8_sse2) PRIVATE -sym(vpx_filter_block1d8_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 0, 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d16_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d16_h8_sse2) PRIVATE -sym(vpx_filter_block1d16_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 0, 0 - - movdqu xmm0, [rsi + 5] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 0, 8 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h8_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm3, xmm0 - movdqa xmm5, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm3, 3 - psrldq xmm5, 5 - psrldq xmm4, 4 - - APPLY_FILTER_4 1 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h8_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 1, 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h8_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 1, 0 - - movdqu xmm0, [rsi + 5] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 1, 8 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm deleted file mode 100644 index d2cb8ea292..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm +++ /dev/null @@ -1,629 +0,0 @@ -; -; Copyright (c) 2015 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -pw_64: times 8 dw 64 - -; %define USE_PMULHRSW -; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss -; when using this instruction. -; -; The add order below (based on ffvp9) must be followed to prevent outranges. -; x = k0k1 + k4k5 -; y = k2k3 + k6k7 -; z = signed SAT(x + y) - -SECTION .text -%if ARCH_X86_64 - %define LOCAL_VARS_SIZE 16*4 -%else - %define LOCAL_VARS_SIZE 16*6 -%endif - -%macro SETUP_LOCAL_VARS 0 - ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 + - ; pmaddubsw has a higher latency on some platforms, this might be eased by - ; interleaving the instructions. - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - packsswb m4, m4 - ; TODO(slavarnway): multiple pshufb instructions had a higher latency on - ; some platforms. - pshuflw m0, m4, 0b ;k0_k1 - pshuflw m1, m4, 01010101b ;k2_k3 - pshuflw m2, m4, 10101010b ;k4_k5 - pshuflw m3, m4, 11111111b ;k6_k7 - punpcklqdq m0, m0 - punpcklqdq m1, m1 - punpcklqdq m2, m2 - punpcklqdq m3, m3 - mova k0k1, m0 - mova k2k3, m1 - mova k4k5, m2 - mova k6k7, m3 -%if ARCH_X86_64 - %define krd m12 - %define tmp m13 - mova krd, [GLOBAL(pw_64)] -%else - %define tmp [rsp + 16*4] - %define krd [rsp + 16*5] -%if CONFIG_PIC=0 - mova m6, [GLOBAL(pw_64)] -%else - ; build constants without accessing global memory - pcmpeqb m6, m6 ;all ones - psrlw m6, 15 - psllw m6, 6 ;aka pw_64 -%endif - mova krd, m6 -%endif -%endm - -%macro HORIZx4_ROW 2 - mova %2, %1 - punpcklbw %1, %1 - punpckhbw %2, %2 - - mova m3, %2 - palignr %2, %1, 1 - palignr m3, %1, 5 - - pmaddubsw %2, k0k1k4k5 - pmaddubsw m3, k2k3k6k7 - mova m4, %2 ;k0k1 - mova m5, m3 ;k2k3 - psrldq %2, 8 ;k4k5 - psrldq m3, 8 ;k6k7 - paddsw %2, m4 - paddsw m5, m3 - paddsw %2, m5 - paddsw %2, krd - psraw %2, 7 - packuswb %2, %2 -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_HFILTER4 1 -cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - packsswb m4, m4 -%if ARCH_X86_64 - %define k0k1k4k5 m8 - %define k2k3k6k7 m9 - %define krd m10 - %define orig_height r7d - mova krd, [GLOBAL(pw_64)] - pshuflw k0k1k4k5, m4, 0b ;k0_k1 - pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5 - pshuflw k2k3k6k7, m4, 01010101b ;k2_k3 - pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7 -%else - %define k0k1k4k5 [rsp + 16*0] - %define k2k3k6k7 [rsp + 16*1] - %define krd [rsp + 16*2] - %define orig_height [rsp + 16*3] - pshuflw m6, m4, 0b ;k0_k1 - pshufhw m6, m6, 10101010b ;k0_k1_k4_k5 - pshuflw m7, m4, 01010101b ;k2_k3 - pshufhw m7, m7, 11111111b ;k2_k3_k6_k7 -%if CONFIG_PIC=0 - mova m1, [GLOBAL(pw_64)] -%else - ; build constants without accessing global memory - pcmpeqb m1, m1 ;all ones - psrlw m1, 15 - psllw m1, 6 ;aka pw_64 -%endif - mova k0k1k4k5, m6 - mova k2k3k6k7, m7 - mova krd, m1 -%endif - mov orig_height, heightd - shr heightd, 1 -.loop: - ;Do two rows at once - movh m0, [srcq - 3] - movh m1, [srcq + 5] - punpcklqdq m0, m1 - mova m1, m0 - movh m2, [srcq + sstrideq - 3] - movh m3, [srcq + sstrideq + 5] - punpcklqdq m2, m3 - mova m3, m2 - punpcklbw m0, m0 - punpckhbw m1, m1 - punpcklbw m2, m2 - punpckhbw m3, m3 - mova m4, m1 - palignr m4, m0, 1 - pmaddubsw m4, k0k1k4k5 - palignr m1, m0, 5 - pmaddubsw m1, k2k3k6k7 - mova m7, m3 - palignr m7, m2, 1 - pmaddubsw m7, k0k1k4k5 - palignr m3, m2, 5 - pmaddubsw m3, k2k3k6k7 - mova m0, m4 ;k0k1 - mova m5, m1 ;k2k3 - mova m2, m7 ;k0k1 upper - psrldq m4, 8 ;k4k5 - psrldq m1, 8 ;k6k7 - paddsw m4, m0 - paddsw m5, m1 - mova m1, m3 ;k2k3 upper - psrldq m7, 8 ;k4k5 upper - psrldq m3, 8 ;k6k7 upper - paddsw m7, m2 - paddsw m4, m5 - paddsw m1, m3 - paddsw m7, m1 - paddsw m4, krd - psraw m4, 7 - packuswb m4, m4 - paddsw m7, krd - psraw m7, 7 - packuswb m7, m7 - -%ifidn %1, h8_avg - movd m0, [dstq] - pavgb m4, m0 - movd m2, [dstq + dstrideq] - pavgb m7, m2 -%endif - movd [dstq], m4 - movd [dstq + dstrideq], m7 - - lea srcq, [srcq + sstrideq ] - prefetcht0 [srcq + 4 * sstrideq - 3] - lea srcq, [srcq + sstrideq ] - lea dstq, [dstq + 2 * dstrideq ] - prefetcht0 [srcq + 2 * sstrideq - 3] - - dec heightd - jnz .loop - - ; Do last row if output_height is odd - mov heightd, orig_height - and heightd, 1 - je .done - - movh m0, [srcq - 3] ; load src - movh m1, [srcq + 5] - punpcklqdq m0, m1 - - HORIZx4_ROW m0, m1 -%ifidn %1, h8_avg - movd m0, [dstq] - pavgb m1, m0 -%endif - movd [dstq], m1 -.done - RET -%endm - -%macro HORIZx8_ROW 5 - mova %2, %1 - punpcklbw %1, %1 - punpckhbw %2, %2 - - mova %3, %2 - mova %4, %2 - mova %5, %2 - - palignr %2, %1, 1 - palignr %3, %1, 5 - palignr %4, %1, 9 - palignr %5, %1, 13 - - pmaddubsw %2, k0k1 - pmaddubsw %3, k2k3 - pmaddubsw %4, k4k5 - pmaddubsw %5, k6k7 - paddsw %2, %4 - paddsw %5, %3 - paddsw %2, %5 - paddsw %2, krd - psraw %2, 7 - packuswb %2, %2 - SWAP %1, %2 -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_HFILTER8 1 -cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define orig_height r7d -%else - %define orig_height heightmp -%endif - mov orig_height, heightd - shr heightd, 1 - -.loop: - movh m0, [srcq - 3] - movh m3, [srcq + 5] - movh m4, [srcq + sstrideq - 3] - movh m7, [srcq + sstrideq + 5] - punpcklqdq m0, m3 - mova m1, m0 - punpcklbw m0, m0 - punpckhbw m1, m1 - mova m5, m1 - palignr m5, m0, 13 - pmaddubsw m5, k6k7 - mova m2, m1 - mova m3, m1 - palignr m1, m0, 1 - pmaddubsw m1, k0k1 - punpcklqdq m4, m7 - mova m6, m4 - punpcklbw m4, m4 - palignr m2, m0, 5 - punpckhbw m6, m6 - palignr m3, m0, 9 - mova m7, m6 - pmaddubsw m2, k2k3 - pmaddubsw m3, k4k5 - - palignr m7, m4, 13 - mova m0, m6 - palignr m0, m4, 5 - pmaddubsw m7, k6k7 - paddsw m1, m3 - paddsw m2, m5 - paddsw m1, m2 - mova m5, m6 - palignr m6, m4, 1 - pmaddubsw m0, k2k3 - pmaddubsw m6, k0k1 - palignr m5, m4, 9 - paddsw m1, krd - pmaddubsw m5, k4k5 - psraw m1, 7 - paddsw m0, m7 -%ifidn %1, h8_avg - movh m7, [dstq] - movh m2, [dstq + dstrideq] -%endif - packuswb m1, m1 - paddsw m6, m5 - paddsw m6, m0 - paddsw m6, krd - psraw m6, 7 - packuswb m6, m6 -%ifidn %1, h8_avg - pavgb m1, m7 - pavgb m6, m2 -%endif - movh [dstq], m1 - movh [dstq + dstrideq], m6 - - lea srcq, [srcq + sstrideq ] - prefetcht0 [srcq + 4 * sstrideq - 3] - lea srcq, [srcq + sstrideq ] - lea dstq, [dstq + 2 * dstrideq ] - prefetcht0 [srcq + 2 * sstrideq - 3] - dec heightd - jnz .loop - - ;Do last row if output_height is odd - mov heightd, orig_height - and heightd, 1 - je .done - - movh m0, [srcq - 3] - movh m3, [srcq + 5] - punpcklqdq m0, m3 - - HORIZx8_ROW m0, m1, m2, m3, m4 - -%ifidn %1, h8_avg - movh m1, [dstq] - pavgb m0, m1 -%endif - movh [dstq], m0 -.done: - RET -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_HFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -.loop: - prefetcht0 [srcq + 2 * sstrideq -3] - - movh m0, [srcq - 3] - movh m4, [srcq + 5] - movh m6, [srcq + 13] - punpcklqdq m0, m4 - mova m7, m0 - punpckhbw m0, m0 - mova m1, m0 - punpcklqdq m4, m6 - mova m3, m0 - punpcklbw m7, m7 - - palignr m3, m7, 13 - mova m2, m0 - pmaddubsw m3, k6k7 - palignr m0, m7, 1 - pmaddubsw m0, k0k1 - palignr m1, m7, 5 - pmaddubsw m1, k2k3 - palignr m2, m7, 9 - pmaddubsw m2, k4k5 - paddsw m1, m3 - mova m3, m4 - punpckhbw m4, m4 - mova m5, m4 - punpcklbw m3, m3 - mova m7, m4 - palignr m5, m3, 5 - mova m6, m4 - palignr m4, m3, 1 - pmaddubsw m4, k0k1 - pmaddubsw m5, k2k3 - palignr m6, m3, 9 - pmaddubsw m6, k4k5 - palignr m7, m3, 13 - pmaddubsw m7, k6k7 - paddsw m0, m2 - paddsw m0, m1 -%ifidn %1, h8_avg - mova m1, [dstq] -%endif - paddsw m4, m6 - paddsw m5, m7 - paddsw m4, m5 - paddsw m0, krd - paddsw m4, krd - psraw m0, 7 - psraw m4, 7 - packuswb m0, m4 -%ifidn %1, h8_avg - pavgb m0, m1 -%endif - lea srcq, [srcq + sstrideq] - mova [dstq], m0 - lea dstq, [dstq + dstrideq] - dec heightd - jnz .loop - RET -%endm - -INIT_XMM ssse3 -SUBPIX_HFILTER16 h8 -SUBPIX_HFILTER16 h8_avg -SUBPIX_HFILTER8 h8 -SUBPIX_HFILTER8 h8_avg -SUBPIX_HFILTER4 h8 -SUBPIX_HFILTER4 h8_avg - -;------------------------------------------------------------------------------- -%macro SUBPIX_VFILTER 2 -cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define src1q r7 - %define sstride6q r8 - %define dst_stride dstrideq -%else - %define src1q filterq - %define sstride6q dstrideq - %define dst_stride dstridemp -%endif - mov src1q, srcq - add src1q, sstrideq - lea sstride6q, [sstrideq + sstrideq * 4] - add sstride6q, sstrideq ;pitch * 6 - -%ifidn %2, 8 - %define movx movh -%else - %define movx movd -%endif -.loop: - movx m0, [srcq ] ;A - movx m1, [srcq + sstrideq ] ;B - punpcklbw m0, m1 ;A B - movx m2, [srcq + sstrideq * 2 ] ;C - pmaddubsw m0, k0k1 - mova m6, m2 - movx m3, [src1q + sstrideq * 2] ;D - punpcklbw m2, m3 ;C D - pmaddubsw m2, k2k3 - movx m4, [srcq + sstrideq * 4 ] ;E - mova m7, m4 - movx m5, [src1q + sstrideq * 4] ;F - punpcklbw m4, m5 ;E F - pmaddubsw m4, k4k5 - punpcklbw m1, m6 ;A B next iter - movx m6, [srcq + sstride6q ] ;G - punpcklbw m5, m6 ;E F next iter - punpcklbw m3, m7 ;C D next iter - pmaddubsw m5, k4k5 - movx m7, [src1q + sstride6q ] ;H - punpcklbw m6, m7 ;G H - pmaddubsw m6, k6k7 - pmaddubsw m3, k2k3 - pmaddubsw m1, k0k1 - paddsw m0, m4 - paddsw m2, m6 - movx m6, [srcq + sstrideq * 8 ] ;H next iter - punpcklbw m7, m6 - pmaddubsw m7, k6k7 - paddsw m0, m2 - paddsw m0, krd - psraw m0, 7 - paddsw m1, m5 - packuswb m0, m0 - - paddsw m3, m7 - paddsw m1, m3 - paddsw m1, krd - psraw m1, 7 - lea srcq, [srcq + sstrideq * 2 ] - lea src1q, [src1q + sstrideq * 2] - packuswb m1, m1 - -%ifidn %1, v8_avg - movx m2, [dstq] - pavgb m0, m2 -%endif - movx [dstq], m0 - add dstq, dst_stride -%ifidn %1, v8_avg - movx m3, [dstq] - pavgb m1, m3 -%endif - movx [dstq], m1 - add dstq, dst_stride - sub heightd, 2 - cmp heightd, 1 - jg .loop - - cmp heightd, 0 - je .done - - movx m0, [srcq ] ;A - movx m1, [srcq + sstrideq ] ;B - movx m6, [srcq + sstride6q ] ;G - punpcklbw m0, m1 ;A B - movx m7, [src1q + sstride6q ] ;H - pmaddubsw m0, k0k1 - movx m2, [srcq + sstrideq * 2 ] ;C - punpcklbw m6, m7 ;G H - movx m3, [src1q + sstrideq * 2] ;D - pmaddubsw m6, k6k7 - movx m4, [srcq + sstrideq * 4 ] ;E - punpcklbw m2, m3 ;C D - movx m5, [src1q + sstrideq * 4] ;F - punpcklbw m4, m5 ;E F - pmaddubsw m2, k2k3 - pmaddubsw m4, k4k5 - paddsw m2, m6 - paddsw m0, m4 - paddsw m0, m2 - paddsw m0, krd - psraw m0, 7 - packuswb m0, m0 -%ifidn %1, v8_avg - movx m1, [dstq] - pavgb m0, m1 -%endif - movx [dstq], m0 -.done: - RET -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_VFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define src1q r7 - %define sstride6q r8 - %define dst_stride dstrideq -%else - %define src1q filterq - %define sstride6q dstrideq - %define dst_stride dstridemp -%endif - mov src1q, srcq - add src1q, sstrideq - lea sstride6q, [sstrideq + sstrideq * 4] - add sstride6q, sstrideq ;pitch * 6 - -.loop: - movh m0, [srcq ] ;A - movh m1, [srcq + sstrideq ] ;B - movh m2, [srcq + sstrideq * 2 ] ;C - movh m3, [src1q + sstrideq * 2] ;D - movh m4, [srcq + sstrideq * 4 ] ;E - movh m5, [src1q + sstrideq * 4] ;F - - punpcklbw m0, m1 ;A B - movh m6, [srcq + sstride6q] ;G - punpcklbw m2, m3 ;C D - movh m7, [src1q + sstride6q] ;H - punpcklbw m4, m5 ;E F - pmaddubsw m0, k0k1 - movh m3, [srcq + 8] ;A - pmaddubsw m2, k2k3 - punpcklbw m6, m7 ;G H - movh m5, [srcq + sstrideq + 8] ;B - pmaddubsw m4, k4k5 - punpcklbw m3, m5 ;A B - movh m7, [srcq + sstrideq * 2 + 8] ;C - pmaddubsw m6, k6k7 - movh m5, [src1q + sstrideq * 2 + 8] ;D - punpcklbw m7, m5 ;C D - paddsw m2, m6 - pmaddubsw m3, k0k1 - movh m1, [srcq + sstrideq * 4 + 8] ;E - paddsw m0, m4 - pmaddubsw m7, k2k3 - movh m6, [src1q + sstrideq * 4 + 8] ;F - punpcklbw m1, m6 ;E F - paddsw m0, m2 - paddsw m0, krd - movh m2, [srcq + sstride6q + 8] ;G - pmaddubsw m1, k4k5 - movh m5, [src1q + sstride6q + 8] ;H - psraw m0, 7 - punpcklbw m2, m5 ;G H - pmaddubsw m2, k6k7 -%ifidn %1, v8_avg - mova m4, [dstq] -%endif - movh [dstq], m0 - paddsw m7, m2 - paddsw m3, m1 - paddsw m3, m7 - paddsw m3, krd - psraw m3, 7 - packuswb m0, m3 - - add srcq, sstrideq - add src1q, sstrideq -%ifidn %1, v8_avg - pavgb m0, m4 -%endif - mova [dstq], m0 - add dstq, dst_stride - dec heightd - jnz .loop - RET -%endm - -INIT_XMM ssse3 -SUBPIX_VFILTER16 v8 -SUBPIX_VFILTER16 v8_avg -SUBPIX_VFILTER v8, 8 -SUBPIX_VFILTER v8_avg, 8 -SUBPIX_VFILTER v8, 4 -SUBPIX_VFILTER v8_avg, 4 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm deleted file mode 100644 index a378dd0402..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm +++ /dev/null @@ -1,448 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro GET_PARAM_4 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm3, [rdx] ;load filters - pshuflw xmm4, xmm3, 11111111b ;k3 - psrldq xmm3, 8 - pshuflw xmm3, xmm3, 0b ;k4 - punpcklqdq xmm4, xmm3 ;k3k4 - - movq xmm3, rcx ;rounding - pshufd xmm3, xmm3, 0 - - pxor xmm2, xmm2 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_4 1 - - punpckldq xmm0, xmm1 ;two row in one register - punpcklbw xmm0, xmm2 ;unpack to word - pmullw xmm0, xmm4 ;multiply the filter factors - - movdqa xmm1, xmm0 - psrldq xmm1, 8 - paddsw xmm0, xmm1 - - paddsw xmm0, xmm3 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - - movd [rdi], xmm0 - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro GET_PARAM 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - - pshuflw xmm6, xmm7, 11111111b ;k3 - pshufhw xmm7, xmm7, 0b ;k4 - punpcklwd xmm6, xmm6 - punpckhwd xmm7, xmm7 - - movq xmm4, rcx ;rounding - pshufd xmm4, xmm4, 0 - - pxor xmm5, xmm5 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_8 1 - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - - pmullw xmm0, xmm6 - pmullw xmm1, xmm7 - paddsw xmm0, xmm1 - paddsw xmm0, xmm4 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movq [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro APPLY_FILTER_16 1 - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpckhbw xmm2, xmm5 - punpckhbw xmm3, xmm5 - - pmullw xmm0, xmm6 - pmullw xmm1, xmm7 - pmullw xmm2, xmm6 - pmullw xmm3, xmm7 - - paddsw xmm0, xmm1 - paddsw xmm2, xmm3 - - paddsw xmm0, xmm4 ;rounding - paddsw xmm2, xmm4 - psraw xmm0, 7 ;shift - psraw xmm2, 7 - packuswb xmm0, xmm2 ;pack back to byte -%if %1 - movdqu xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movdqu [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -global sym(vpx_filter_block1d4_v2_sse2) PRIVATE -sym(vpx_filter_block1d4_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_sse2) PRIVATE -sym(vpx_filter_block1d8_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_sse2) PRIVATE -sym(vpx_filter_block1d16_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_v2_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_sse2) PRIVATE -sym(vpx_filter_block1d4_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_sse2) PRIVATE -sym(vpx_filter_block1d8_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_sse2) PRIVATE -sym(vpx_filter_block1d16_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm deleted file mode 100644 index 3c8cfd2253..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm +++ /dev/null @@ -1,422 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro GET_PARAM_4 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm3, [rdx] ;load filters - psrldq xmm3, 6 - packsswb xmm3, xmm3 - pshuflw xmm3, xmm3, 0b ;k3_k4 - - movq xmm2, rcx ;rounding - pshufd xmm2, xmm2, 0 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_4 1 - punpcklbw xmm0, xmm1 - pmaddubsw xmm0, xmm3 - - paddsw xmm0, xmm2 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movd [rdi], xmm0 - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro GET_PARAM 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - psrldq xmm7, 6 - packsswb xmm7, xmm7 - pshuflw xmm7, xmm7, 0b ;k3_k4 - punpcklwd xmm7, xmm7 - - movq xmm6, rcx ;rounding - pshufd xmm6, xmm6, 0 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_8 1 - punpcklbw xmm0, xmm1 - pmaddubsw xmm0, xmm7 - - paddsw xmm0, xmm6 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte - -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movq [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro APPLY_FILTER_16 1 - punpcklbw xmm0, xmm1 - punpckhbw xmm2, xmm1 - pmaddubsw xmm0, xmm7 - pmaddubsw xmm2, xmm7 - - paddsw xmm0, xmm6 ;rounding - paddsw xmm2, xmm6 - psraw xmm0, 7 ;shift - psraw xmm2, 7 - packuswb xmm0, xmm2 ;pack back to byte - -%if %1 - movdqu xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movdqu [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -global sym(vpx_filter_block1d4_v2_ssse3) PRIVATE -sym(vpx_filter_block1d4_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_ssse3) PRIVATE -sym(vpx_filter_block1d8_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_ssse3) PRIVATE -sym(vpx_filter_block1d16_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_v2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d4_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d8_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d16_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_ssse3) PRIVATE -sym(vpx_filter_block1d4_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_ssse3) PRIVATE -sym(vpx_filter_block1d8_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_ssse3) PRIVATE -sym(vpx_filter_block1d16_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d4_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d8_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d16_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vpx_dsp_rtcd.h b/thirdparty/libvpx/vpx_dsp_rtcd.h deleted file mode 100644 index 4d5ad89533..0000000000 --- a/thirdparty/libvpx/vpx_dsp_rtcd.h +++ /dev/null @@ -1,9 +0,0 @@ -#include "vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - #include "rtcd/vpx_dsp_rtcd_x86.h" -#elif defined(WEBM_ARMASM) && ARCH_ARM - #include "rtcd/vpx_dsp_rtcd_arm.h" -#else - #include "rtcd/vpx_dsp_rtcd_c.h" -#endif diff --git a/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h b/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h deleted file mode 100644 index c4dd78550f..0000000000 --- a/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ -#define VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ -#include "./vpx_config.h" - -#define ADDRESS_STORAGE_SIZE sizeof(size_t) - -#ifndef DEFAULT_ALIGNMENT -# if defined(VXWORKS) -# define DEFAULT_ALIGNMENT 32 /*default addr alignment to use in -calls to vpx_* functions other -than vpx_memalign*/ -# else -# define DEFAULT_ALIGNMENT (2 * sizeof(void*)) /* NOLINT */ -# endif -#endif - -/*returns an addr aligned to the byte boundary specified by align*/ -#define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) - -#endif // VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ diff --git a/thirdparty/libvpx/vpx_mem/vpx_mem.c b/thirdparty/libvpx/vpx_mem/vpx_mem.c deleted file mode 100644 index b261fc0da1..0000000000 --- a/thirdparty/libvpx/vpx_mem/vpx_mem.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_mem.h" -#include -#include -#include -#include "include/vpx_mem_intrnl.h" -#include "vpx/vpx_integer.h" - -void *vpx_memalign(size_t align, size_t size) { - void *addr, - * x = NULL; - - addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE); - - if (addr) { - x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)addr; - } - - return x; -} - -void *vpx_malloc(size_t size) { - return vpx_memalign(DEFAULT_ALIGNMENT, size); -} - -void *vpx_calloc(size_t num, size_t size) { - void *x; - - x = vpx_memalign(DEFAULT_ALIGNMENT, num * size); - - if (x) - memset(x, 0, num * size); - - return x; -} - -void *vpx_realloc(void *memblk, size_t size) { - void *addr, - * new_addr = NULL; - int align = DEFAULT_ALIGNMENT; - - /* - The realloc() function changes the size of the object pointed to by - ptr to the size specified by size, and returns a pointer to the - possibly moved block. The contents are unchanged up to the lesser - of the new and old sizes. If ptr is null, realloc() behaves like - malloc() for the specified size. If size is zero (0) and ptr is - not a null pointer, the object pointed to is freed. - */ - if (!memblk) - new_addr = vpx_malloc(size); - else if (!size) - vpx_free(memblk); - else { - addr = (void *)(((size_t *)memblk)[-1]); - memblk = NULL; - - new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE); - - if (new_addr) { - addr = new_addr; - new_addr = (void *)(((size_t) - ((unsigned char *)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) & - (size_t) - align); - /* save the actual malloc address */ - ((size_t *)new_addr)[-1] = (size_t)addr; - } - } - - return new_addr; -} - -void vpx_free(void *memblk) { - if (memblk) { - void *addr = (void *)(((size_t *)memblk)[-1]); - free(addr); - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void *vpx_memset16(void *dest, int val, size_t length) { - size_t i; - uint16_t *dest16 = (uint16_t *)dest; - for (i = 0; i < length; i++) - *dest16++ = val; - return dest; -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_mem/vpx_mem.h b/thirdparty/libvpx/vpx_mem/vpx_mem.h deleted file mode 100644 index a006e0f00b..0000000000 --- a/thirdparty/libvpx/vpx_mem/vpx_mem.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_MEM_VPX_MEM_H_ -#define VPX_MEM_VPX_MEM_H_ - -#include "vpx_config.h" -#if defined(__uClinux__) -# include -#endif - -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - - void *vpx_memalign(size_t align, size_t size); - void *vpx_malloc(size_t size); - void *vpx_calloc(size_t num, size_t size); - void *vpx_realloc(void *memblk, size_t size); - void vpx_free(void *memblk); - -#if CONFIG_VP9_HIGHBITDEPTH - void *vpx_memset16(void *dest, int val, size_t length); -#endif - -#include - -#ifdef VPX_MEM_PLTFRM -# include VPX_MEM_PLTFRM -#endif - -#if defined(__cplusplus) -} -#endif - -#endif // VPX_MEM_VPX_MEM_H_ diff --git a/thirdparty/libvpx/vpx_ports/arm.h b/thirdparty/libvpx/vpx_ports/arm.h deleted file mode 100644 index 42c98f5a83..0000000000 --- a/thirdparty/libvpx/vpx_ports/arm.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_ARM_H_ -#define VPX_PORTS_ARM_H_ -#include -#include "vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*ARMv5TE "Enhanced DSP" instructions.*/ -#define HAS_EDSP 0x01 -/*ARMv6 "Parallel" or "Media" instructions.*/ -#define HAS_MEDIA 0x02 -/*ARMv7 optional NEON instructions.*/ -#define HAS_NEON 0x04 - -int arm_cpu_caps(void); - -// Earlier gcc compilers have issues with some neon intrinsics -#if !defined(__clang__) && defined(__GNUC__) && \ - __GNUC__ == 4 && __GNUC_MINOR__ <= 6 -#define VPX_INCOMPATIBLE_GCC -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_PORTS_ARM_H_ - diff --git a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c b/thirdparty/libvpx/vpx_ports/arm_cpudetect.c deleted file mode 100644 index 7eb74a7dc9..0000000000 --- a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include "vpx_ports/arm.h" -#include "./vpx_config.h" - -#ifdef WINAPI_FAMILY -#include -#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) -#define getenv(x) NULL -#endif -#endif - -static int arm_cpu_env_flags(int *flags) { - char *env; - env = getenv("VPX_SIMD_CAPS"); - if (env && *env) { - *flags = (int)strtol(env, NULL, 0); - return 0; - } - *flags = 0; - return -1; -} - -static int arm_cpu_env_mask(void) { - char *env; - env = getenv("VPX_SIMD_CAPS_MASK"); - return env && *env ? (int)strtol(env, NULL, 0) : ~0; -} - -#if !CONFIG_RUNTIME_CPU_DETECT - #error "CONFIG_RUNTIME_CPU_DETECT should be enabled!" -#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ -/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ -#define WIN32_LEAN_AND_MEAN -#define WIN32_EXTRA_LEAN -#include - -int arm_cpu_caps(void) { - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); - /* MSVC has no inline __asm support for ARM, but it does let you __emit - * instructions via their assembled hex code. - * All of these instructions should be essentially nops. - */ -#if HAVE_MEDIA - if (mask & HAS_MEDIA) { - __try { - /*SHADD8 r3,r3,r3*/ - __emit(0xE6333F93); - flags |= HAS_MEDIA; - } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - /*Ignore exception.*/ - } - } -#endif /* HAVE_MEDIA */ -#if HAVE_NEON || HAVE_NEON_ASM - if (mask &HAS_NEON) { - __try { - /*VORR q0,q0,q0*/ - __emit(0xF2200150); - flags |= HAS_NEON; - } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - /*Ignore exception.*/ - } - } -#endif /* HAVE_NEON || HAVE_NEON_ASM */ - return flags & mask; -} - -#elif defined(__ANDROID__) /* end _MSC_VER */ -#include - -int arm_cpu_caps(void) { - int flags; - int mask; - uint64_t features; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); - features = android_getCpuFeatures(); - -#if HAVE_MEDIA - flags |= HAS_MEDIA; -#endif /* HAVE_MEDIA */ -#if HAVE_NEON || HAVE_NEON_ASM - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - flags |= HAS_NEON; -#endif /* HAVE_NEON || HAVE_NEON_ASM */ - return flags & mask; -} - -#elif defined(__linux__) /* end __ANDROID__ */ - -#include - -int arm_cpu_caps(void) { - FILE *fin; - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); - /* Reading /proc/self/auxv would be easier, but that doesn't work reliably - * on Android. - * This also means that detection will fail in Scratchbox. - */ - fin = fopen("/proc/cpuinfo", "r"); - if (fin != NULL) { - /* 512 should be enough for anybody (it's even enough for all the flags - * that x86 has accumulated... so far). - */ - char buf[512]; - while (fgets(buf, 511, fin) != NULL) { -#if HAVE_NEON || HAVE_NEON_ASM - if (memcmp(buf, "Features", 8) == 0) { - char *p; - p = strstr(buf, " neon"); - if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { - flags |= HAS_NEON; - } - } -#endif /* HAVE_NEON || HAVE_NEON_ASM */ -#if HAVE_MEDIA - if (memcmp(buf, "CPU architecture:", 17) == 0) { - int version; - version = atoi(buf + 17); - if (version >= 6) { - flags |= HAS_MEDIA; - } - } -#endif /* HAVE_MEDIA */ - } - fclose(fin); - } - return flags & mask; -} -#else /* end __linux__ */ -int arm_cpu_caps(void) { - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); -#if HAVE_MEDIA - flags |= HAS_MEDIA; -#endif /* HAVE_MEDIA */ -#if HAVE_NEON || HAVE_NEON_ASM - flags |= HAS_NEON; -#endif /* HAVE_NEON || HAVE_NEON_ASM */ - return flags & mask; -} -#warning "ARM run-time CPU detection is disabled for this platform..." -#endif diff --git a/thirdparty/libvpx/vpx_ports/bitops.h b/thirdparty/libvpx/vpx_ports/bitops.h deleted file mode 100644 index 84ff3659fe..0000000000 --- a/thirdparty/libvpx/vpx_ports/bitops.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_BITOPS_H_ -#define VPX_PORTS_BITOPS_H_ - -#include - -#include "vpx_ports/msvc.h" - -#ifdef _MSC_VER -# include // the ceil() definition must precede intrin.h -# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86)) -# include -# define USE_MSC_INTRINSICS -# endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// These versions of get_msb() are only valid when n != 0 because all -// of the optimized versions are undefined when n == 0: -// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html - -// use GNU builtins where available. -#if defined(__GNUC__) && \ - ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) -static INLINE int get_msb(unsigned int n) { - assert(n != 0); - return 31 ^ __builtin_clz(n); -} -#elif defined(USE_MSC_INTRINSICS) -#pragma intrinsic(_BitScanReverse) - -static INLINE int get_msb(unsigned int n) { - unsigned long first_set_bit; - assert(n != 0); - _BitScanReverse(&first_set_bit, n); - return first_set_bit; -} -#undef USE_MSC_INTRINSICS -#else -// Returns (int)floor(log2(n)). n must be > 0. -static INLINE int get_msb(unsigned int n) { - int log = 0; - unsigned int value = n; - int i; - - assert(n != 0); - - for (i = 4; i >= 0; --i) { - const int shift = (1 << i); - const unsigned int x = value >> shift; - if (x != 0) { - value = x; - log += shift; - } - } - return log; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_PORTS_BITOPS_H_ diff --git a/thirdparty/libvpx/vpx_ports/config.h b/thirdparty/libvpx/vpx_ports/config.h deleted file mode 100644 index 3c1ab99f4a..0000000000 --- a/thirdparty/libvpx/vpx_ports/config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_CONFIG_H_ -#define VPX_PORTS_CONFIG_H_ - -#include "vpx_config.h" - -#endif // VPX_PORTS_CONFIG_H_ diff --git a/thirdparty/libvpx/vpx_ports/emmintrin_compat.h b/thirdparty/libvpx/vpx_ports/emmintrin_compat.h deleted file mode 100644 index 16176383d2..0000000000 --- a/thirdparty/libvpx/vpx_ports/emmintrin_compat.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H_ -#define VPX_PORTS_EMMINTRIN_COMPAT_H_ - -#if defined(__GNUC__) && __GNUC__ < 4 -/* From emmintrin.h (gcc 4.5.3) */ -/* Casts between various SP, DP, INT vector types. Note that these do no - conversion of values, they just change the type. */ -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castpd_ps(__m128d __A) -{ - return (__m128) __A; -} - -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castpd_si128(__m128d __A) -{ - return (__m128i) __A; -} - -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castps_pd(__m128 __A) -{ - return (__m128d) __A; -} - -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castps_si128(__m128 __A) -{ - return (__m128i) __A; -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castsi128_ps(__m128i __A) -{ - return (__m128) __A; -} - -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castsi128_pd(__m128i __A) -{ - return (__m128d) __A; -} -#endif - -#endif // VPX_PORTS_EMMINTRIN_COMPAT_H_ diff --git a/thirdparty/libvpx/vpx_ports/emms.asm b/thirdparty/libvpx/vpx_ports/emms.asm deleted file mode 100644 index db8da28737..0000000000 --- a/thirdparty/libvpx/vpx_ports/emms.asm +++ /dev/null @@ -1,38 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -section .text -global sym(vpx_reset_mmx_state) PRIVATE -sym(vpx_reset_mmx_state): - emms - ret - - -%if LIBVPX_YASM_WIN64 -global sym(vpx_winx64_fldcw) PRIVATE -sym(vpx_winx64_fldcw): - sub rsp, 8 - mov [rsp], rcx ; win x64 specific - fldcw [rsp] - add rsp, 8 - ret - - -global sym(vpx_winx64_fstcw) PRIVATE -sym(vpx_winx64_fstcw): - sub rsp, 8 - fstcw [rsp] - mov rax, [rsp] - add rsp, 8 - ret -%endif diff --git a/thirdparty/libvpx/vpx_ports/mem.h b/thirdparty/libvpx/vpx_ports/mem.h deleted file mode 100644 index 7502f90632..0000000000 --- a/thirdparty/libvpx/vpx_ports/mem.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_MEM_H_ -#define VPX_PORTS_MEM_H_ - -#include "vpx_config.h" -#include "vpx/vpx_integer.h" - -#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C) -#define DECLARE_ALIGNED(n,typ,val) typ val __attribute__ ((aligned (n))) -#elif defined(_MSC_VER) -#define DECLARE_ALIGNED(n,typ,val) __declspec(align(n)) typ val -#else -#warning No alignment directives known for this compiler. -#define DECLARE_ALIGNED(n,typ,val) typ val -#endif - -/* Indicates that the usage of the specified variable has been audited to assure - * that it's safe to use uninitialized. Silences 'may be used uninitialized' - * warnings on gcc. - */ -#if defined(__GNUC__) && __GNUC__ -#define UNINITIALIZED_IS_SAFE(x) x=x -#else -#define UNINITIALIZED_IS_SAFE(x) x -#endif - -#if HAVE_NEON && defined(_MSC_VER) -#define __builtin_prefetch(x) -#endif - -/* Shift down with rounding */ -#define ROUND_POWER_OF_TWO(value, n) \ - (((value) + (1 << ((n) - 1))) >> (n)) - -#define ALIGN_POWER_OF_TWO(value, n) \ - (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) - -#if CONFIG_VP9_HIGHBITDEPTH -#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1)) -#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1)) -#endif // CONFIG_VP9_HIGHBITDEPTH - -#endif // VPX_PORTS_MEM_H_ diff --git a/thirdparty/libvpx/vpx_ports/mem_ops.h b/thirdparty/libvpx/vpx_ports/mem_ops.h deleted file mode 100644 index 620df31b22..0000000000 --- a/thirdparty/libvpx/vpx_ports/mem_ops.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_MEM_OPS_H_ -#define VPX_PORTS_MEM_OPS_H_ - -/* \file - * \brief Provides portable memory access primitives - * - * This function provides portable primitives for getting and setting of - * signed and unsigned integers in 16, 24, and 32 bit sizes. The operations - * can be performed on unaligned data regardless of hardware support for - * unaligned accesses. - * - * The type used to pass the integral values may be changed by defining - * MEM_VALUE_T with the appropriate type. The type given must be an integral - * numeric type. - * - * The actual functions instantiated have the MEM_VALUE_T type name pasted - * on to the symbol name. This allows the developer to instantiate these - * operations for multiple types within the same translation unit. This is - * of somewhat questionable utility, but the capability exists nonetheless. - * Users not making use of this functionality should call the functions - * without the type name appended, and the preprocessor will take care of - * it. - * - * NOTE: This code is not supported on platforms where char > 1 octet ATM. - */ - -#ifndef MAU_T -/* Minimum Access Unit for this target */ -#define MAU_T unsigned char -#endif - -#ifndef MEM_VALUE_T -#define MEM_VALUE_T int -#endif - -#undef MEM_VALUE_T_SZ_BITS -#define MEM_VALUE_T_SZ_BITS (sizeof(MEM_VALUE_T) << 3) - -#undef mem_ops_wrap_symbol -#define mem_ops_wrap_symbol(fn) mem_ops_wrap_symbol2(fn, MEM_VALUE_T) -#undef mem_ops_wrap_symbol2 -#define mem_ops_wrap_symbol2(fn,typ) mem_ops_wrap_symbol3(fn,typ) -#undef mem_ops_wrap_symbol3 -#define mem_ops_wrap_symbol3(fn,typ) fn##_as_##typ - -/* - * Include aligned access routines - */ -#define INCLUDED_BY_MEM_OPS_H -#include "mem_ops_aligned.h" -#undef INCLUDED_BY_MEM_OPS_H - -#undef mem_get_be16 -#define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16) -static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[0] << 8; - val |= mem[1]; - return val; -} - -#undef mem_get_be24 -#define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24) -static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[0] << 16; - val |= mem[1] << 8; - val |= mem[2]; - return val; -} - -#undef mem_get_be32 -#define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32) -static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = ((unsigned MEM_VALUE_T)mem[0]) << 24; - val |= mem[1] << 16; - val |= mem[2] << 8; - val |= mem[3]; - return val; -} - -#undef mem_get_le16 -#define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16) -static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[1] << 8; - val |= mem[0]; - return val; -} - -#undef mem_get_le24 -#define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24) -static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; -} - -#undef mem_get_le32 -#define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32) -static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = ((unsigned MEM_VALUE_T)mem[3]) << 24; - val |= mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; -} - -#define mem_get_s_generic(end,sz) \ - static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\ - const MAU_T *mem = (const MAU_T*)vmem;\ - signed MEM_VALUE_T val = mem_get_##end##sz(mem);\ - return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\ - } - -#undef mem_get_sbe16 -#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) -mem_get_s_generic(be, 16) - -#undef mem_get_sbe24 -#define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) -mem_get_s_generic(be, 24) - -#undef mem_get_sbe32 -#define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) -mem_get_s_generic(be, 32) - -#undef mem_get_sle16 -#define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) -mem_get_s_generic(le, 16) - -#undef mem_get_sle24 -#define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) -mem_get_s_generic(le, 24) - -#undef mem_get_sle32 -#define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) -mem_get_s_generic(le, 32) - -#undef mem_put_be16 -#define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) -static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 8) & 0xff); - mem[1] = (MAU_T)((val >> 0) & 0xff); -} - -#undef mem_put_be24 -#define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24) -static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 16) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); - mem[2] = (MAU_T)((val >> 0) & 0xff); -} - -#undef mem_put_be32 -#define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32) -static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 24) & 0xff); - mem[1] = (MAU_T)((val >> 16) & 0xff); - mem[2] = (MAU_T)((val >> 8) & 0xff); - mem[3] = (MAU_T)((val >> 0) & 0xff); -} - -#undef mem_put_le16 -#define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16) -static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 0) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); -} - -#undef mem_put_le24 -#define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24) -static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 0) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); - mem[2] = (MAU_T)((val >> 16) & 0xff); -} - -#undef mem_put_le32 -#define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32) -static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 0) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); - mem[2] = (MAU_T)((val >> 16) & 0xff); - mem[3] = (MAU_T)((val >> 24) & 0xff); -} - -#endif // VPX_PORTS_MEM_OPS_H_ diff --git a/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h b/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h deleted file mode 100644 index 46f61738ba..0000000000 --- a/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_MEM_OPS_ALIGNED_H_ -#define VPX_PORTS_MEM_OPS_ALIGNED_H_ - -#include "vpx/vpx_integer.h" - -/* \file - * \brief Provides portable memory access primitives for operating on aligned - * data - * - * This file is split from mem_ops.h for easier maintenance. See mem_ops.h - * for a more detailed description of these primitives. - */ -#ifndef INCLUDED_BY_MEM_OPS_H -#error Include mem_ops.h, not mem_ops_aligned.h directly. -#endif - -/* Architectures that provide instructions for doing this byte swapping - * could redefine these macros. - */ -#define swap_endian_16(val,raw) do {\ - val = (uint16_t)(((raw>>8) & 0x00ff) \ - | ((raw<<8) & 0xff00));\ - } while(0) -#define swap_endian_32(val,raw) do {\ - val = ((raw>>24) & 0x000000ff) \ - | ((raw>>8) & 0x0000ff00) \ - | ((raw<<8) & 0x00ff0000) \ - | ((raw<<24) & 0xff000000); \ - } while(0) -#define swap_endian_16_se(val,raw) do {\ - swap_endian_16(val,raw);\ - val = ((val << 16) >> 16);\ - } while(0) -#define swap_endian_32_se(val,raw) swap_endian_32(val,raw) - -#define mem_get_ne_aligned_generic(end,sz) \ - static VPX_INLINE unsigned MEM_VALUE_T \ - mem_get_##end##sz##_aligned(const void *vmem) {\ - const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ - return *mem;\ - } - -#define mem_get_sne_aligned_generic(end,sz) \ - static VPX_INLINE signed MEM_VALUE_T \ - mem_get_s##end##sz##_aligned(const void *vmem) {\ - const int##sz##_t *mem = (const int##sz##_t *)vmem;\ - return *mem;\ - } - -#define mem_get_se_aligned_generic(end,sz) \ - static VPX_INLINE unsigned MEM_VALUE_T \ - mem_get_##end##sz##_aligned(const void *vmem) {\ - const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ - unsigned MEM_VALUE_T val, raw = *mem;\ - swap_endian_##sz(val,raw);\ - return val;\ - } - -#define mem_get_sse_aligned_generic(end,sz) \ - static VPX_INLINE signed MEM_VALUE_T \ - mem_get_s##end##sz##_aligned(const void *vmem) {\ - const int##sz##_t *mem = (const int##sz##_t *)vmem;\ - unsigned MEM_VALUE_T val, raw = *mem;\ - swap_endian_##sz##_se(val,raw);\ - return val;\ - } - -#define mem_put_ne_aligned_generic(end,sz) \ - static VPX_INLINE void \ - mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ - uint##sz##_t *mem = (uint##sz##_t *)vmem;\ - *mem = (uint##sz##_t)val;\ - } - -#define mem_put_se_aligned_generic(end,sz) \ - static VPX_INLINE void \ - mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ - uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\ - swap_endian_##sz(raw,val);\ - *mem = (uint##sz##_t)raw;\ - } - -#include "vpx_config.h" -#if CONFIG_BIG_ENDIAN -#define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be,sz) -#define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be,sz) -#define mem_get_le_aligned_generic(sz) mem_get_se_aligned_generic(le,sz) -#define mem_get_sle_aligned_generic(sz) mem_get_sse_aligned_generic(le,sz) -#define mem_put_be_aligned_generic(sz) mem_put_ne_aligned_generic(be,sz) -#define mem_put_le_aligned_generic(sz) mem_put_se_aligned_generic(le,sz) -#else -#define mem_get_be_aligned_generic(sz) mem_get_se_aligned_generic(be,sz) -#define mem_get_sbe_aligned_generic(sz) mem_get_sse_aligned_generic(be,sz) -#define mem_get_le_aligned_generic(sz) mem_get_ne_aligned_generic(le,sz) -#define mem_get_sle_aligned_generic(sz) mem_get_sne_aligned_generic(le,sz) -#define mem_put_be_aligned_generic(sz) mem_put_se_aligned_generic(be,sz) -#define mem_put_le_aligned_generic(sz) mem_put_ne_aligned_generic(le,sz) -#endif - -#undef mem_get_be16_aligned -#define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) -mem_get_be_aligned_generic(16) - -#undef mem_get_be32_aligned -#define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) -mem_get_be_aligned_generic(32) - -#undef mem_get_le16_aligned -#define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) -mem_get_le_aligned_generic(16) - -#undef mem_get_le32_aligned -#define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) -mem_get_le_aligned_generic(32) - -#undef mem_get_sbe16_aligned -#define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) -mem_get_sbe_aligned_generic(16) - -#undef mem_get_sbe32_aligned -#define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) -mem_get_sbe_aligned_generic(32) - -#undef mem_get_sle16_aligned -#define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) -mem_get_sle_aligned_generic(16) - -#undef mem_get_sle32_aligned -#define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) -mem_get_sle_aligned_generic(32) - -#undef mem_put_be16_aligned -#define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) -mem_put_be_aligned_generic(16) - -#undef mem_put_be32_aligned -#define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) -mem_put_be_aligned_generic(32) - -#undef mem_put_le16_aligned -#define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) -mem_put_le_aligned_generic(16) - -#undef mem_put_le32_aligned -#define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) -mem_put_le_aligned_generic(32) - -#undef mem_get_ne_aligned_generic -#undef mem_get_se_aligned_generic -#undef mem_get_sne_aligned_generic -#undef mem_get_sse_aligned_generic -#undef mem_put_ne_aligned_generic -#undef mem_put_se_aligned_generic -#undef swap_endian_16 -#undef swap_endian_32 -#undef swap_endian_16_se -#undef swap_endian_32_se - -#endif // VPX_PORTS_MEM_OPS_ALIGNED_H_ diff --git a/thirdparty/libvpx/vpx_ports/msvc.h b/thirdparty/libvpx/vpx_ports/msvc.h deleted file mode 100644 index cab77405f4..0000000000 --- a/thirdparty/libvpx/vpx_ports/msvc.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_MSVC_H_ -#define VPX_PORTS_MSVC_H_ -#ifdef _MSC_VER - -#include "./vpx_config.h" - -# if _MSC_VER < 1900 // VS2015 provides snprintf -# define snprintf _snprintf -# endif // _MSC_VER < 1900 - -#if _MSC_VER < 1800 // VS2013 provides round -#include -static INLINE double round(double x) { - if (x < 0) - return ceil(x - 0.5); - else - return floor(x + 0.5); -} -#endif // _MSC_VER < 1800 - -#endif // _MSC_VER -#endif // VPX_PORTS_MSVC_H_ diff --git a/thirdparty/libvpx/vpx_ports/system_state.h b/thirdparty/libvpx/vpx_ports/system_state.h deleted file mode 100644 index 01989dcafc..0000000000 --- a/thirdparty/libvpx/vpx_ports/system_state.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_SYSTEM_STATE_H_ -#define VPX_PORTS_SYSTEM_STATE_H_ - -#include "./vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - void vpx_reset_mmx_state(void); - #define vpx_clear_system_state() vpx_reset_mmx_state() -#else - #define vpx_clear_system_state() -#endif // ARCH_X86 || ARCH_X86_64 -#endif // VPX_PORTS_SYSTEM_STATE_H_ diff --git a/thirdparty/libvpx/vpx_ports/vpx_once.h b/thirdparty/libvpx/vpx_ports/vpx_once.h deleted file mode 100644 index da04db4590..0000000000 --- a/thirdparty/libvpx/vpx_ports/vpx_once.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_VPX_ONCE_H_ -#define VPX_PORTS_VPX_ONCE_H_ - -#include "vpx_config.h" - -/* Implement a function wrapper to guarantee initialization - * thread-safety for library singletons. - * - * NOTE: These functions use static locks, and can only be - * used with one common argument per compilation unit. So - * - * file1.c: - * vpx_once(foo); - * ... - * vpx_once(foo); - * - * file2.c: - * vpx_once(bar); - * - * will ensure foo() and bar() are each called only once, but in - * - * file1.c: - * vpx_once(foo); - * vpx_once(bar): - * - * bar() will never be called because the lock is used up - * by the call to foo(). - */ - -#if CONFIG_MULTITHREAD && defined(_WIN32) -#include -#include -/* Declare a per-compilation-unit state variable to track the progress - * of calling func() only once. This must be at global scope because - * local initializers are not thread-safe in MSVC prior to Visual - * Studio 2015. - * - * As a static, once_state will be zero-initialized as program start. - */ -static LONG once_state; -static void once(void (*func)(void)) -{ - /* Try to advance once_state from its initial value of 0 to 1. - * Only one thread can succeed in doing so. - */ - if (InterlockedCompareExchange(&once_state, 1, 0) == 0) { - /* We're the winning thread, having set once_state to 1. - * Call our function. */ - func(); - /* Now advance once_state to 2, unblocking any other threads. */ - InterlockedIncrement(&once_state); - return; - } - - /* We weren't the winning thread, but we want to block on - * the state variable so we don't return before func() - * has finished executing elsewhere. - * - * Try to advance once_state from 2 to 2, which is only possible - * after the winning thead advances it from 1 to 2. - */ - while (InterlockedCompareExchange(&once_state, 2, 2) != 2) { - /* State isn't yet 2. Try again. - * - * We are used for singleton initialization functions, - * which should complete quickly. Contention will likewise - * be rare, so it's worthwhile to use a simple but cpu- - * intensive busy-wait instead of successive backoff, - * waiting on a kernel object, or another heavier-weight scheme. - * - * We can at least yield our timeslice. - */ - Sleep(0); - } - - /* We've seen once_state advance to 2, so we know func() - * has been called. And we've left once_state as we found it, - * so other threads will have the same experience. - * - * It's safe to return now. - */ - return; -} - - -#elif CONFIG_MULTITHREAD && defined(__OS2__) -#define INCL_DOS -#include -static void once(void (*func)(void)) -{ - static int done; - - /* If the initialization is complete, return early. */ - if(done) - return; - - /* Causes all other threads in the process to block themselves - * and give up their time slice. - */ - DosEnterCritSec(); - - if (!done) - { - func(); - done = 1; - } - - /* Restores normal thread dispatching for the current process. */ - DosExitCritSec(); -} - - -#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H -#include -static void once(void (*func)(void)) -{ - static pthread_once_t lock = PTHREAD_ONCE_INIT; - pthread_once(&lock, func); -} - - -#else -/* No-op version that performs no synchronization. *_rtcd() is idempotent, - * so as long as your platform provides atomic loads/stores of pointers - * no synchronization is strictly necessary. - */ - -static void once(void (*func)(void)) -{ - static int done; - - if(!done) - { - func(); - done = 1; - } -} -#endif - -#endif // VPX_PORTS_VPX_ONCE_H_ diff --git a/thirdparty/libvpx/vpx_ports/vpx_timer.h b/thirdparty/libvpx/vpx_ports/vpx_timer.h deleted file mode 100644 index dd98e291c2..0000000000 --- a/thirdparty/libvpx/vpx_ports/vpx_timer.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_VPX_TIMER_H_ -#define VPX_PORTS_VPX_TIMER_H_ - -#include "./vpx_config.h" - -#include "vpx/vpx_integer.h" - -#if CONFIG_OS_SUPPORT - -#if defined(_WIN32) -/* - * Win32 specific includes - */ -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#include -#else -/* - * POSIX specific includes - */ -#include - -/* timersub is not provided by msys at this time. */ -#ifndef timersub -#define timersub(a, b, result) \ - do { \ - (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ - (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ - if ((result)->tv_usec < 0) { \ - --(result)->tv_sec; \ - (result)->tv_usec += 1000000; \ - } \ - } while (0) -#endif -#endif - - -struct vpx_usec_timer { -#if defined(_WIN32) - LARGE_INTEGER begin, end; -#else - struct timeval begin, end; -#endif -}; - - -static INLINE void -vpx_usec_timer_start(struct vpx_usec_timer *t) { -#if defined(_WIN32) - QueryPerformanceCounter(&t->begin); -#else - gettimeofday(&t->begin, NULL); -#endif -} - - -static INLINE void -vpx_usec_timer_mark(struct vpx_usec_timer *t) { -#if defined(_WIN32) - QueryPerformanceCounter(&t->end); -#else - gettimeofday(&t->end, NULL); -#endif -} - - -static INLINE int64_t -vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { -#if defined(_WIN32) - LARGE_INTEGER freq, diff; - - diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; - - QueryPerformanceFrequency(&freq); - return diff.QuadPart * 1000000 / freq.QuadPart; -#else - struct timeval diff; - - timersub(&t->end, &t->begin, &diff); - return diff.tv_sec * 1000000 + diff.tv_usec; -#endif -} - -#else /* CONFIG_OS_SUPPORT = 0*/ - -/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */ -#ifndef timersub -#define timersub(a, b, result) -#endif - -struct vpx_usec_timer { - void *dummy; -}; - -static INLINE void -vpx_usec_timer_start(struct vpx_usec_timer *t) { } - -static INLINE void -vpx_usec_timer_mark(struct vpx_usec_timer *t) { } - -static INLINE int -vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { - return 0; -} - -#endif /* CONFIG_OS_SUPPORT */ - -#endif // VPX_PORTS_VPX_TIMER_H_ diff --git a/thirdparty/libvpx/vpx_ports/x86.h b/thirdparty/libvpx/vpx_ports/x86.h deleted file mode 100644 index bae25ac345..0000000000 --- a/thirdparty/libvpx/vpx_ports/x86.h +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_X86_H_ -#define VPX_PORTS_X86_H_ -#include - -#if defined(_MSC_VER) -#include /* For __cpuidex, __rdtsc */ -#endif - -#include "vpx_config.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - VPX_CPU_UNKNOWN = -1, - VPX_CPU_AMD, - VPX_CPU_AMD_OLD, - VPX_CPU_CENTAUR, - VPX_CPU_CYRIX, - VPX_CPU_INTEL, - VPX_CPU_NEXGEN, - VPX_CPU_NSC, - VPX_CPU_RISE, - VPX_CPU_SIS, - VPX_CPU_TRANSMETA, - VPX_CPU_TRANSMETA_OLD, - VPX_CPU_UMC, - VPX_CPU_VIA, - - VPX_CPU_LAST -} vpx_cpu_t; - -#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) -#if ARCH_X86_64 -#define cpuid(func, func2, ax, bx, cx, dx)\ - __asm__ __volatile__ (\ - "cpuid \n\t" \ - : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#else -#define cpuid(func, func2, ax, bx, cx, dx)\ - __asm__ __volatile__ (\ - "mov %%ebx, %%edi \n\t" \ - "cpuid \n\t" \ - "xchg %%edi, %%ebx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#endif -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ -#if ARCH_X86_64 -#define cpuid(func, func2, ax, bx, cx, dx)\ - asm volatile (\ - "xchg %rsi, %rbx \n\t" \ - "cpuid \n\t" \ - "movl %ebx, %edi \n\t" \ - "xchg %rsi, %rbx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#else -#define cpuid(func, func2, ax, bx, cx, dx)\ - asm volatile (\ - "pushl %ebx \n\t" \ - "cpuid \n\t" \ - "movl %ebx, %edi \n\t" \ - "popl %ebx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#endif -#else /* end __SUNPRO__ */ -#if ARCH_X86_64 -#if defined(_MSC_VER) && _MSC_VER > 1500 -#define cpuid(func, func2, a, b, c, d) do {\ - int regs[4];\ - __cpuidex(regs, func, func2); \ - a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ - } while(0) -#else -#define cpuid(func, func2, a, b, c, d) do {\ - int regs[4];\ - __cpuid(regs, func); \ - a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ - } while (0) -#endif -#else -#define cpuid(func, func2, a, b, c, d)\ - __asm mov eax, func\ - __asm mov ecx, func2\ - __asm cpuid\ - __asm mov a, eax\ - __asm mov b, ebx\ - __asm mov c, ecx\ - __asm mov d, edx -#endif -#endif /* end others */ - -// NaCl has no support for xgetbv or the raw opcode. -#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) -static INLINE uint64_t xgetbv(void) { - const uint32_t ecx = 0; - uint32_t eax, edx; - // Use the raw opcode for xgetbv for compatibility with older toolchains. - __asm__ volatile ( - ".byte 0x0f, 0x01, 0xd0\n" - : "=a"(eax), "=d"(edx) : "c" (ecx)); - return ((uint64_t)edx << 32) | eax; -} -#elif (defined(_M_X64) || defined(_M_IX86)) && \ - defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 -#include -#define xgetbv() _xgetbv(0) -#elif defined(_MSC_VER) && defined(_M_IX86) -static INLINE uint64_t xgetbv(void) { - uint32_t eax_, edx_; - __asm { - xor ecx, ecx // ecx = 0 - // Use the raw opcode for xgetbv for compatibility with older toolchains. - __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 - mov eax_, eax - mov edx_, edx - } - return ((uint64_t)edx_ << 32) | eax_; -} -#else -#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. -#endif - -#if defined(_MSC_VER) && _MSC_VER >= 1700 -#include -#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) -#define getenv(x) NULL -#endif -#endif - -#define HAS_MMX 0x01 -#define HAS_SSE 0x02 -#define HAS_SSE2 0x04 -#define HAS_SSE3 0x08 -#define HAS_SSSE3 0x10 -#define HAS_SSE4_1 0x20 -#define HAS_AVX 0x40 -#define HAS_AVX2 0x80 -#ifndef BIT -#define BIT(n) (1<= 7) { - /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ - cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); - - if (reg_ebx & BIT(5)) flags |= HAS_AVX2; - } - } - } - - return flags & mask; -} - -// Note: -// 32-bit CPU cycle counter is light-weighted for most function performance -// measurement. For large function (CPU time > a couple of seconds), 64-bit -// counter should be used. -// 32-bit CPU cycle counter -static INLINE unsigned int -x86_readtsc(void) { -#if defined(__GNUC__) && __GNUC__ - unsigned int tsc; - __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); - return tsc; -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) - unsigned int tsc; - asm volatile("rdtsc\n\t":"=a"(tsc):); - return tsc; -#else -#if ARCH_X86_64 - return (unsigned int)__rdtsc(); -#else - __asm rdtsc; -#endif -#endif -} -// 64-bit CPU cycle counter -static INLINE uint64_t -x86_readtsc64(void) { -#if defined(__GNUC__) && __GNUC__ - uint32_t hi, lo; - __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); - return ((uint64_t)hi << 32) | lo; -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) - uint_t hi, lo; - asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); - return ((uint64_t)hi << 32) | lo; -#else -#if ARCH_X86_64 - return (uint64_t)__rdtsc(); -#else - __asm rdtsc; -#endif -#endif -} - -#if defined(__GNUC__) && __GNUC__ -#define x86_pause_hint()\ - __asm__ __volatile__ ("pause \n\t") -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) -#define x86_pause_hint()\ - asm volatile ("pause \n\t") -#else -#if ARCH_X86_64 -#define x86_pause_hint()\ - _mm_pause(); -#else -#define x86_pause_hint()\ - __asm pause -#endif -#endif - -#if defined(__GNUC__) && __GNUC__ -static void -x87_set_control_word(unsigned short mode) { - __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); -} -static unsigned short -x87_get_control_word(void) { - unsigned short mode; - __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); - return mode; -} -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) -static void -x87_set_control_word(unsigned short mode) { - asm volatile("fldcw %0" : : "m"(*&mode)); -} -static unsigned short -x87_get_control_word(void) { - unsigned short mode; - asm volatile("fstcw %0\n\t":"=m"(*&mode):); - return mode; -} -#elif ARCH_X86_64 -/* No fldcw intrinsics on Windows x64, punt to external asm */ -extern void vpx_winx64_fldcw(unsigned short mode); -extern unsigned short vpx_winx64_fstcw(void); -#define x87_set_control_word vpx_winx64_fldcw -#define x87_get_control_word vpx_winx64_fstcw -#else -static void -x87_set_control_word(unsigned short mode) { - __asm { fldcw mode } -} -static unsigned short -x87_get_control_word(void) { - unsigned short mode; - __asm { fstcw mode } - return mode; -} -#endif - -static INLINE unsigned int -x87_set_double_precision(void) { - unsigned int mode = x87_get_control_word(); - x87_set_control_word((mode&~0x300) | 0x200); - return mode; -} - - -extern void vpx_reset_mmx_state(void); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_PORTS_X86_H_ diff --git a/thirdparty/libvpx/vpx_ports/x86_abi_support.asm b/thirdparty/libvpx/vpx_ports/x86_abi_support.asm deleted file mode 100644 index 708fa101c5..0000000000 --- a/thirdparty/libvpx/vpx_ports/x86_abi_support.asm +++ /dev/null @@ -1,404 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_config.asm" - -; 32/64 bit compatibility macros -; -; In general, we make the source use 64 bit syntax, then twiddle with it using -; the preprocessor to get the 32 bit syntax on 32 bit platforms. -; -%ifidn __OUTPUT_FORMAT__,elf32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,macho32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,win32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,aout -%define ABI_IS_32BIT 1 -%else -%define ABI_IS_32BIT 0 -%endif - -%if ABI_IS_32BIT -%define rax eax -%define rbx ebx -%define rcx ecx -%define rdx edx -%define rsi esi -%define rdi edi -%define rsp esp -%define rbp ebp -%define movsxd mov -%macro movq 2 - %ifidn %1,eax - movd %1,%2 - %elifidn %2,eax - movd %1,%2 - %elifidn %1,ebx - movd %1,%2 - %elifidn %2,ebx - movd %1,%2 - %elifidn %1,ecx - movd %1,%2 - %elifidn %2,ecx - movd %1,%2 - %elifidn %1,edx - movd %1,%2 - %elifidn %2,edx - movd %1,%2 - %elifidn %1,esi - movd %1,%2 - %elifidn %2,esi - movd %1,%2 - %elifidn %1,edi - movd %1,%2 - %elifidn %2,edi - movd %1,%2 - %elifidn %1,esp - movd %1,%2 - %elifidn %2,esp - movd %1,%2 - %elifidn %1,ebp - movd %1,%2 - %elifidn %2,ebp - movd %1,%2 - %else - movq %1,%2 - %endif -%endmacro -%endif - - -; LIBVPX_YASM_WIN64 -; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 -; or win64 is defined on the Yasm command line. -%ifidn __OUTPUT_FORMAT__,win64 -%define LIBVPX_YASM_WIN64 1 -%elifidn __OUTPUT_FORMAT__,x64 -%define LIBVPX_YASM_WIN64 1 -%else -%define LIBVPX_YASM_WIN64 0 -%endif - -; sym() -; Return the proper symbol name for the target ABI. -; -; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols -; with C linkage be prefixed with an underscore. -; -%ifidn __OUTPUT_FORMAT__,elf32 -%define sym(x) x -%elifidn __OUTPUT_FORMAT__,elf64 -%define sym(x) x -%elifidn __OUTPUT_FORMAT__,elfx32 -%define sym(x) x -%elif LIBVPX_YASM_WIN64 -%define sym(x) x -%else -%define sym(x) _ %+ x -%endif - -; PRIVATE -; Macro for the attribute to hide a global symbol for the target ABI. -; This is only active if CHROMIUM is defined. -; -; Chromium doesn't like exported global symbols due to symbol clashing with -; plugins among other things. -; -; Requires Chromium's patched copy of yasm: -; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 -; http://www.tortall.net/projects/yasm/ticket/236 -; -%ifdef CHROMIUM - %ifidn __OUTPUT_FORMAT__,elf32 - %define PRIVATE :hidden - %elifidn __OUTPUT_FORMAT__,elf64 - %define PRIVATE :hidden - %elifidn __OUTPUT_FORMAT__,elfx32 - %define PRIVATE :hidden - %elif LIBVPX_YASM_WIN64 - %define PRIVATE - %else - %define PRIVATE :private_extern - %endif -%else - %define PRIVATE -%endif - -; arg() -; Return the address specification of the given argument -; -%if ABI_IS_32BIT - %define arg(x) [ebp+8+4*x] -%else - ; 64 bit ABI passes arguments in registers. This is a workaround to get up - ; and running quickly. Relies on SHADOW_ARGS_TO_STACK - %if LIBVPX_YASM_WIN64 - %define arg(x) [rbp+16+8*x] - %else - %define arg(x) [rbp-8-8*x] - %endif -%endif - -; REG_SZ_BYTES, REG_SZ_BITS -; Size of a register -%if ABI_IS_32BIT -%define REG_SZ_BYTES 4 -%define REG_SZ_BITS 32 -%else -%define REG_SZ_BYTES 8 -%define REG_SZ_BITS 64 -%endif - - -; ALIGN_STACK -; This macro aligns the stack to the given alignment (in bytes). The stack -; is left such that the previous value of the stack pointer is the first -; argument on the stack (ie, the inverse of this macro is 'pop rsp.') -; This macro uses one temporary register, which is not preserved, and thus -; must be specified as an argument. -%macro ALIGN_STACK 2 - mov %2, rsp - and rsp, -%1 - lea rsp, [rsp - (%1 - REG_SZ_BYTES)] - push %2 -%endmacro - - -; -; The Microsoft assembler tries to impose a certain amount of type safety in -; its register usage. YASM doesn't recognize these directives, so we just -; %define them away to maintain as much compatibility as possible with the -; original inline assembler we're porting from. -; -%idefine PTR -%idefine XMMWORD -%idefine MMWORD - -; PIC macros -; -%if ABI_IS_32BIT - %if CONFIG_PIC=1 - %ifidn __OUTPUT_FORMAT__,elf32 - %define WRT_PLT wrt ..plt - %macro GET_GOT 1 - extern _GLOBAL_OFFSET_TABLE_ - push %1 - call %%get_got - %%sub_offset: - jmp %%exitGG - %%get_got: - mov %1, [esp] - add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc - ret - %%exitGG: - %undef GLOBAL - %define GLOBAL(x) x + %1 wrt ..gotoff - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %elifidn __OUTPUT_FORMAT__,macho32 - %macro GET_GOT 1 - push %1 - call %%get_got - %%get_got: - pop %1 - %undef GLOBAL - %define GLOBAL(x) x + %1 - %%get_got - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %endif - %endif - - %ifdef CHROMIUM - %ifidn __OUTPUT_FORMAT__,macho32 - %define HIDDEN_DATA(x) x:private_extern - %else - %define HIDDEN_DATA(x) x - %endif - %else - %define HIDDEN_DATA(x) x - %endif -%else - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) rel x - %ifidn __OUTPUT_FORMAT__,elf64 - %define WRT_PLT wrt ..plt - %define HIDDEN_DATA(x) x:data hidden - %elifidn __OUTPUT_FORMAT__,elfx32 - %define WRT_PLT wrt ..plt - %define HIDDEN_DATA(x) x:data hidden - %elifidn __OUTPUT_FORMAT__,macho64 - %ifdef CHROMIUM - %define HIDDEN_DATA(x) x:private_extern - %else - %define HIDDEN_DATA(x) x - %endif - %else - %define HIDDEN_DATA(x) x - %endif -%endif -%ifnmacro GET_GOT - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) x -%endif -%ifndef RESTORE_GOT -%define RESTORE_GOT -%endif -%ifndef WRT_PLT -%define WRT_PLT -%endif - -%if ABI_IS_32BIT - %macro SHADOW_ARGS_TO_STACK 1 - %endm - %define UNSHADOW_ARGS -%else -%if LIBVPX_YASM_WIN64 - %macro SHADOW_ARGS_TO_STACK 1 ; argc - %if %1 > 0 - mov arg(0),rcx - %endif - %if %1 > 1 - mov arg(1),rdx - %endif - %if %1 > 2 - mov arg(2),r8 - %endif - %if %1 > 3 - mov arg(3),r9 - %endif - %endm -%else - %macro SHADOW_ARGS_TO_STACK 1 ; argc - %if %1 > 0 - push rdi - %endif - %if %1 > 1 - push rsi - %endif - %if %1 > 2 - push rdx - %endif - %if %1 > 3 - push rcx - %endif - %if %1 > 4 - push r8 - %endif - %if %1 > 5 - push r9 - %endif - %if %1 > 6 - %assign i %1-6 - %assign off 16 - %rep i - mov rax,[rbp+off] - push rax - %assign off off+8 - %endrep - %endif - %endm -%endif - %define UNSHADOW_ARGS mov rsp, rbp -%endif - -; Win64 ABI requires that XMM6:XMM15 are callee saved -; SAVE_XMM n, [u] -; store registers 6-n on the stack -; if u is specified, use unaligned movs. -; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return -; value. Typically we follow this up with 'push rbp' - re-aligning the stack - -; but in some cases this is not done and unaligned movs must be used. -%if LIBVPX_YASM_WIN64 -%macro SAVE_XMM 1-2 a - %if %1 < 6 - %error Only xmm registers 6-15 must be preserved - %else - %assign last_xmm %1 - %define movxmm movdq %+ %2 - %assign xmm_stack_space ((last_xmm - 5) * 16) - sub rsp, xmm_stack_space - %assign i 6 - %rep (last_xmm - 5) - movxmm [rsp + ((i - 6) * 16)], xmm %+ i - %assign i i+1 - %endrep - %endif -%endmacro -%macro RESTORE_XMM 0 - %ifndef last_xmm - %error RESTORE_XMM must be paired with SAVE_XMM n - %else - %assign i last_xmm - %rep (last_xmm - 5) - movxmm xmm %+ i, [rsp +((i - 6) * 16)] - %assign i i-1 - %endrep - add rsp, xmm_stack_space - ; there are a couple functions which return from multiple places. - ; otherwise, we could uncomment these: - ; %undef last_xmm - ; %undef xmm_stack_space - ; %undef movxmm - %endif -%endmacro -%else -%macro SAVE_XMM 1-2 -%endmacro -%macro RESTORE_XMM 0 -%endmacro -%endif - -; Name of the rodata section -; -; .rodata seems to be an elf-ism, as it doesn't work on OSX. -; -%ifidn __OUTPUT_FORMAT__,macho64 -%define SECTION_RODATA section .text -%elifidn __OUTPUT_FORMAT__,macho32 -%macro SECTION_RODATA 0 -section .text -%endmacro -%elifidn __OUTPUT_FORMAT__,aout -%define SECTION_RODATA section .data -%else -%define SECTION_RODATA section .rodata -%endif - - -; Tell GNU ld that we don't require an executable stack. -%ifidn __OUTPUT_FORMAT__,elf32 -section .note.GNU-stack noalloc noexec nowrite progbits -section .text -%elifidn __OUTPUT_FORMAT__,elf64 -section .note.GNU-stack noalloc noexec nowrite progbits -section .text -%elifidn __OUTPUT_FORMAT__,elfx32 -section .note.GNU-stack noalloc noexec nowrite progbits -section .text -%endif - -; On Android platforms use lrand48 when building postproc routines. Prior to L -; rand() was not available. -%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1 -%ifdef __ANDROID__ -extern sym(lrand48) -%define LIBVPX_RAND lrand48 -%else -extern sym(rand) -%define LIBVPX_RAND rand -%endif -%endif ; CONFIG_POSTPROC || CONFIG_VP9_POSTPROC diff --git a/thirdparty/libvpx/vpx_scale/generic/yv12config.c b/thirdparty/libvpx/vpx_scale/generic/yv12config.c deleted file mode 100644 index 6bbb6d8d48..0000000000 --- a/thirdparty/libvpx/vpx_scale/generic/yv12config.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "vpx_scale/yv12config.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" - -/**************************************************************************** -* Exports -****************************************************************************/ - -/**************************************************************************** - * - ****************************************************************************/ -#define yv12_align_addr(addr, align) \ - (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) - -int -vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { - if (ybf) { - // If libvpx is using frame buffer callbacks then buffer_alloc_sz must - // not be set. - if (ybf->buffer_alloc_sz > 0) { - vpx_free(ybf->buffer_alloc); - } - - /* buffer_alloc isn't accessed by most functions. Rather y_buffer, - u_buffer and v_buffer point to buffer_alloc and are used. Clear out - all of this so that a freed pointer isn't inadvertently used */ - memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); - } else { - return -1; - } - - return 0; -} - -int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border) { - if (ybf) { - int aligned_width = (width + 15) & ~15; - int aligned_height = (height + 15) & ~15; - int y_stride = ((aligned_width + 2 * border) + 31) & ~31; - int yplane_size = (aligned_height + 2 * border) * y_stride; - int uv_width = aligned_width >> 1; - int uv_height = aligned_height >> 1; - /** There is currently a bunch of code which assumes - * uv_stride == y_stride/2, so enforce this here. */ - int uv_stride = y_stride >> 1; - int uvplane_size = (uv_height + border) * uv_stride; - const int frame_size = yplane_size + 2 * uvplane_size; - - if (!ybf->buffer_alloc) { - ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size); - ybf->buffer_alloc_sz = frame_size; - } - - if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) - return -1; - - /* Only support allocating buffers that have a border that's a multiple - * of 32. The border restriction is required to get 16-byte alignment of - * the start of the chroma rows without introducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if (border & 0x1f) - return -3; - - ybf->y_crop_width = width; - ybf->y_crop_height = height; - ybf->y_width = aligned_width; - ybf->y_height = aligned_height; - ybf->y_stride = y_stride; - - ybf->uv_crop_width = (width + 1) / 2; - ybf->uv_crop_height = (height + 1) / 2; - ybf->uv_width = uv_width; - ybf->uv_height = uv_height; - ybf->uv_stride = uv_stride; - - ybf->alpha_width = 0; - ybf->alpha_height = 0; - ybf->alpha_stride = 0; - - ybf->border = border; - ybf->frame_size = frame_size; - - ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; - ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; - ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; - ybf->alpha_buffer = NULL; - - ybf->corrupted = 0; /* assume not currupted by errors */ - return 0; - } - return -2; -} - -int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border) { - if (ybf) { - vp8_yv12_de_alloc_frame_buffer(ybf); - return vp8_yv12_realloc_frame_buffer(ybf, width, height, border); - } - return -2; -} - -#if CONFIG_VP9 -// TODO(jkoleszar): Maybe replace this with struct vpx_image - -int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) { - if (ybf) { - if (ybf->buffer_alloc_sz > 0) { - vpx_free(ybf->buffer_alloc); - } - - /* buffer_alloc isn't accessed by most functions. Rather y_buffer, - u_buffer and v_buffer point to buffer_alloc and are used. Clear out - all of this so that a freed pointer isn't inadvertently used */ - memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); - } else { - return -1; - } - - return 0; -} - -int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, - int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, - int byte_alignment, - vpx_codec_frame_buffer_t *fb, - vpx_get_frame_buffer_cb_fn_t cb, - void *cb_priv) { - if (ybf) { - const int vp9_byte_align = (byte_alignment == 0) ? 1 : byte_alignment; - const int aligned_width = (width + 7) & ~7; - const int aligned_height = (height + 7) & ~7; - const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; - const uint64_t yplane_size = (aligned_height + 2 * border) * - (uint64_t)y_stride + byte_alignment; - const int uv_width = aligned_width >> ss_x; - const int uv_height = aligned_height >> ss_y; - const int uv_stride = y_stride >> ss_x; - const int uv_border_w = border >> ss_x; - const int uv_border_h = border >> ss_y; - const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) * - (uint64_t)uv_stride + byte_alignment; - -#if CONFIG_VP9_HIGHBITDEPTH - const uint64_t frame_size = - (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size); -#else - const uint64_t frame_size = yplane_size + 2 * uvplane_size; -#endif // CONFIG_VP9_HIGHBITDEPTH - - uint8_t *buf = NULL; - - if (cb != NULL) { - const int align_addr_extra_size = 31; - const uint64_t external_frame_size = frame_size + align_addr_extra_size; - - assert(fb != NULL); - - if (external_frame_size != (size_t)external_frame_size) - return -1; - - // Allocation to hold larger frame, or first allocation. - if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) - return -1; - - if (fb->data == NULL || fb->size < external_frame_size) - return -1; - - ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); - -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) - // This memset is needed for fixing the issue of using uninitialized - // value in msan test. It will cause a perf loss, so only do this for - // msan test. - memset(ybf->buffer_alloc, 0, (int)frame_size); -#endif -#endif - } else if (frame_size > (size_t)ybf->buffer_alloc_sz) { - // Allocation to hold larger frame, or first allocation. - vpx_free(ybf->buffer_alloc); - ybf->buffer_alloc = NULL; - - if (frame_size != (size_t)frame_size) - return -1; - - ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, (size_t)frame_size); - if (!ybf->buffer_alloc) - return -1; - - ybf->buffer_alloc_sz = (int)frame_size; - - // This memset is needed for fixing valgrind error from C loop filter - // due to access uninitialized memory in frame border. It could be - // removed if border is totally removed. - memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); - } - - /* Only support allocating buffers that have a border that's a multiple - * of 32. The border restriction is required to get 16-byte alignment of - * the start of the chroma rows without introducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if (border & 0x1f) - return -3; - - ybf->y_crop_width = width; - ybf->y_crop_height = height; - ybf->y_width = aligned_width; - ybf->y_height = aligned_height; - ybf->y_stride = y_stride; - - ybf->uv_crop_width = (width + ss_x) >> ss_x; - ybf->uv_crop_height = (height + ss_y) >> ss_y; - ybf->uv_width = uv_width; - ybf->uv_height = uv_height; - ybf->uv_stride = uv_stride; - - ybf->border = border; - ybf->frame_size = (int)frame_size; - ybf->subsampling_x = ss_x; - ybf->subsampling_y = ss_y; - - buf = ybf->buffer_alloc; -#if CONFIG_VP9_HIGHBITDEPTH - if (use_highbitdepth) { - // Store uint16 addresses when using 16bit framebuffers - buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc); - ybf->flags = YV12_FLAG_HIGHBITDEPTH; - } else { - ybf->flags = 0; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - ybf->y_buffer = (uint8_t *)yv12_align_addr( - buf + (border * y_stride) + border, vp9_byte_align); - ybf->u_buffer = (uint8_t *)yv12_align_addr( - buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w, - vp9_byte_align); - ybf->v_buffer = (uint8_t *)yv12_align_addr( - buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) + - uv_border_w, vp9_byte_align); - - ybf->corrupted = 0; /* assume not corrupted by errors */ - return 0; - } - return -2; -} - -int vpx_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, - int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, - int byte_alignment) { - if (ybf) { - vpx_free_frame_buffer(ybf); - return vpx_realloc_frame_buffer(ybf, width, height, ss_x, ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif - border, byte_alignment, NULL, NULL, NULL); - } - return -2; -} -#endif diff --git a/thirdparty/libvpx/vpx_scale/generic/yv12extend.c b/thirdparty/libvpx/vpx_scale/generic/yv12extend.c deleted file mode 100644 index 52f0aff1f2..0000000000 --- a/thirdparty/libvpx/vpx_scale/generic/yv12extend.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include "./vpx_config.h" -#include "./vpx_scale_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_scale/yv12config.h" -#if CONFIG_VP9_HIGHBITDEPTH -#include "vp9/common/vp9_common.h" -#endif - -static void extend_plane(uint8_t *const src, int src_stride, - int width, int height, - int extend_top, int extend_left, - int extend_bottom, int extend_right) { - int i; - const int linesize = extend_left + extend_right + width; - - /* copy the left and right most columns out */ - uint8_t *src_ptr1 = src; - uint8_t *src_ptr2 = src + width - 1; - uint8_t *dst_ptr1 = src - extend_left; - uint8_t *dst_ptr2 = src + width; - - for (i = 0; i < height; ++i) { - memset(dst_ptr1, src_ptr1[0], extend_left); - memset(dst_ptr2, src_ptr2[0], extend_right); - src_ptr1 += src_stride; - src_ptr2 += src_stride; - dst_ptr1 += src_stride; - dst_ptr2 += src_stride; - } - - /* Now copy the top and bottom lines into each line of the respective - * borders - */ - src_ptr1 = src - extend_left; - src_ptr2 = src + src_stride * (height - 1) - extend_left; - dst_ptr1 = src + src_stride * -extend_top - extend_left; - dst_ptr2 = src + src_stride * height - extend_left; - - for (i = 0; i < extend_top; ++i) { - memcpy(dst_ptr1, src_ptr1, linesize); - dst_ptr1 += src_stride; - } - - for (i = 0; i < extend_bottom; ++i) { - memcpy(dst_ptr2, src_ptr2, linesize); - dst_ptr2 += src_stride; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void extend_plane_high(uint8_t *const src8, int src_stride, - int width, int height, - int extend_top, int extend_left, - int extend_bottom, int extend_right) { - int i; - const int linesize = extend_left + extend_right + width; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - - /* copy the left and right most columns out */ - uint16_t *src_ptr1 = src; - uint16_t *src_ptr2 = src + width - 1; - uint16_t *dst_ptr1 = src - extend_left; - uint16_t *dst_ptr2 = src + width; - - for (i = 0; i < height; ++i) { - vpx_memset16(dst_ptr1, src_ptr1[0], extend_left); - vpx_memset16(dst_ptr2, src_ptr2[0], extend_right); - src_ptr1 += src_stride; - src_ptr2 += src_stride; - dst_ptr1 += src_stride; - dst_ptr2 += src_stride; - } - - /* Now copy the top and bottom lines into each line of the respective - * borders - */ - src_ptr1 = src - extend_left; - src_ptr2 = src + src_stride * (height - 1) - extend_left; - dst_ptr1 = src + src_stride * -extend_top - extend_left; - dst_ptr2 = src + src_stride * height - extend_left; - - for (i = 0; i < extend_top; ++i) { - memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); - dst_ptr1 += src_stride; - } - - for (i = 0; i < extend_bottom; ++i) { - memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); - dst_ptr2 += src_stride; - } -} -#endif - -void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { - const int uv_border = ybf->border / 2; - - assert(ybf->border % 2 == 0); - assert(ybf->y_height - ybf->y_crop_height < 16); - assert(ybf->y_width - ybf->y_crop_width < 16); - assert(ybf->y_height - ybf->y_crop_height >= 0); - assert(ybf->y_width - ybf->y_crop_width >= 0); - -#if CONFIG_VP9_HIGHBITDEPTH - if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) { - extend_plane_high( - ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ybf->border, ybf->border, - ybf->border + ybf->y_height - ybf->y_crop_height, - ybf->border + ybf->y_width - ybf->y_crop_width); - - extend_plane_high( - ybf->u_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); - - extend_plane_high( - ybf->v_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); - return; - } -#endif - extend_plane(ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ybf->border, ybf->border, - ybf->border + ybf->y_height - ybf->y_crop_height, - ybf->border + ybf->y_width - ybf->y_crop_width); - - extend_plane(ybf->u_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); - - extend_plane(ybf->v_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); -} - -#if CONFIG_VP9 -static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) { - const int c_w = ybf->uv_crop_width; - const int c_h = ybf->uv_crop_height; - const int ss_x = ybf->uv_width < ybf->y_width; - const int ss_y = ybf->uv_height < ybf->y_height; - const int c_et = ext_size >> ss_y; - const int c_el = ext_size >> ss_x; - const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height; - const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width; - - assert(ybf->y_height - ybf->y_crop_height < 16); - assert(ybf->y_width - ybf->y_crop_width < 16); - assert(ybf->y_height - ybf->y_crop_height >= 0); - assert(ybf->y_width - ybf->y_crop_width >= 0); - -#if CONFIG_VP9_HIGHBITDEPTH - if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) { - extend_plane_high(ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ext_size, ext_size, - ext_size + ybf->y_height - ybf->y_crop_height, - ext_size + ybf->y_width - ybf->y_crop_width); - extend_plane_high(ybf->u_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); - extend_plane_high(ybf->v_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); - return; - } -#endif - extend_plane(ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ext_size, ext_size, - ext_size + ybf->y_height - ybf->y_crop_height, - ext_size + ybf->y_width - ybf->y_crop_width); - - extend_plane(ybf->u_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); - - extend_plane(ybf->v_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); -} - -void vpx_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { - extend_frame(ybf, ybf->border); -} - -void vpx_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) { - const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ? - VP9INNERBORDERINPIXELS : ybf->border; - extend_frame(ybf, inner_bw); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) { - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - memcpy(dst, src, num * sizeof(uint16_t)); -} -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_VP9 - -// Copies the source image into the destination image and updates the -// destination's UMV borders. -// Note: The frames are assumed to be identical in size. -void vp8_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc) { - int row; - const uint8_t *src = src_ybc->y_buffer; - uint8_t *dst = dst_ybc->y_buffer; - -#if 0 - /* These assertions are valid in the codec, but the libvpx-tester uses - * this code slightly differently. - */ - assert(src_ybc->y_width == dst_ybc->y_width); - assert(src_ybc->y_height == dst_ybc->y_height); -#endif - -#if CONFIG_VP9_HIGHBITDEPTH - if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { - assert(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH); - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy_short_addr(dst, src, src_ybc->y_width); - src += src_ybc->y_stride; - dst += dst_ybc->y_stride; - } - - src = src_ybc->u_buffer; - dst = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy_short_addr(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - src = src_ybc->v_buffer; - dst = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy_short_addr(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders_c(dst_ybc); - return; - } else { - assert(!(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH)); - } -#endif - - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy(dst, src, src_ybc->y_width); - src += src_ybc->y_stride; - dst += dst_ybc->y_stride; - } - - src = src_ybc->u_buffer; - dst = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - src = src_ybc->v_buffer; - dst = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders_c(dst_ybc); -} - -void vpx_yv12_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc) { - int row; - const uint8_t *src = src_ybc->y_buffer; - uint8_t *dst = dst_ybc->y_buffer; - -#if CONFIG_VP9_HIGHBITDEPTH - if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { - const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); - uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t)); - src16 += src_ybc->y_stride; - dst16 += dst_ybc->y_stride; - } - return; - } -#endif - - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy(dst, src, src_ybc->y_width); - src += src_ybc->y_stride; - dst += dst_ybc->y_stride; - } -} diff --git a/thirdparty/libvpx/vpx_scale/vpx_scale.h b/thirdparty/libvpx/vpx_scale/vpx_scale.h deleted file mode 100644 index 43fcf9d66e..0000000000 --- a/thirdparty/libvpx/vpx_scale/vpx_scale.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_SCALE_VPX_SCALE_H_ -#define VPX_SCALE_VPX_SCALE_H_ - -#include "vpx_scale/yv12config.h" - -extern void vpx_scale_frame(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - unsigned char *temp_area, - unsigned char temp_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced); - -#endif // VPX_SCALE_VPX_SCALE_H_ diff --git a/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c b/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c deleted file mode 100644 index bea603fd10..0000000000 --- a/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vpx_scale_rtcd.h" -#include "vpx_ports/vpx_once.h" - -void vpx_scale_rtcd() -{ - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vpx_scale/yv12config.h b/thirdparty/libvpx/vpx_scale/yv12config.h deleted file mode 100644 index 37b255d4d3..0000000000 --- a/thirdparty/libvpx/vpx_scale/yv12config.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_SCALE_YV12CONFIG_H_ -#define VPX_SCALE_YV12CONFIG_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_config.h" -#include "vpx/vpx_codec.h" -#include "vpx/vpx_frame_buffer.h" -#include "vpx/vpx_integer.h" - -#define VP8BORDERINPIXELS 32 -#define VP9INNERBORDERINPIXELS 96 -#define VP9_INTERP_EXTEND 4 -#define VP9_ENC_BORDER_IN_PIXELS 160 -#define VP9_DEC_BORDER_IN_PIXELS 32 - -typedef struct yv12_buffer_config { - int y_width; - int y_height; - int y_crop_width; - int y_crop_height; - int y_stride; - - int uv_width; - int uv_height; - int uv_crop_width; - int uv_crop_height; - int uv_stride; - - int alpha_width; - int alpha_height; - int alpha_stride; - - uint8_t *y_buffer; - uint8_t *u_buffer; - uint8_t *v_buffer; - uint8_t *alpha_buffer; - - uint8_t *buffer_alloc; - int buffer_alloc_sz; - int border; - int frame_size; - int subsampling_x; - int subsampling_y; - unsigned int bit_depth; - vpx_color_space_t color_space; - vpx_color_range_t color_range; - int render_width; - int render_height; - - int corrupted; - int flags; -} YV12_BUFFER_CONFIG; - -#define YV12_FLAG_HIGHBITDEPTH 8 - -int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border); -int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border); -int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); - -int vpx_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, int byte_alignment); - -// Updates the yv12 buffer config with the frame buffer. |byte_alignment| must -// be a power of 2, from 32 to 1024. 0 sets legacy alignment. If cb is not -// NULL, then libvpx is using the frame buffer callbacks to handle memory. -// If cb is not NULL, libvpx will call cb with minimum size in bytes needed -// to decode the current frame. If cb is NULL, libvpx will allocate memory -// internally to decode the current frame. Returns 0 on success. Returns < 0 -// on failure. -int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, - int byte_alignment, - vpx_codec_frame_buffer_t *fb, - vpx_get_frame_buffer_cb_fn_t cb, - void *cb_priv); -int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf); - -#ifdef __cplusplus -} -#endif - -#endif // VPX_SCALE_YV12CONFIG_H_ diff --git a/thirdparty/libvpx/vpx_scale_rtcd.h b/thirdparty/libvpx/vpx_scale_rtcd.h deleted file mode 100644 index 4d59467fe9..0000000000 --- a/thirdparty/libvpx/vpx_scale_rtcd.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef VPX_SCALE_RTCD_H_ -#define VPX_SCALE_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -struct yv12_buffer_config; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_yv12_copy_frame_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c - -void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf); -#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c - -void vpx_extend_frame_borders_c(struct yv12_buffer_config *ybf); -#define vpx_extend_frame_borders vpx_extend_frame_borders_c - -void vpx_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf); -#define vpx_extend_frame_inner_borders vpx_extend_frame_inner_borders_c - -void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -#define vpx_yv12_copy_y vpx_yv12_copy_y_c - -void vpx_scale_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ - //Only MIPS has something here, but it is not supported -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/vpx_util/endian_inl.h b/thirdparty/libvpx/vpx_util/endian_inl.h deleted file mode 100644 index 37bdce1ccd..0000000000 --- a/thirdparty/libvpx/vpx_util/endian_inl.h +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2014 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Endian related functions. - -#ifndef VPX_UTIL_ENDIAN_INL_H_ -#define VPX_UTIL_ENDIAN_INL_H_ - -#include -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -#if defined(__GNUC__) -# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) -# define LOCAL_GCC_PREREQ(maj, min) \ - (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) -#else -# define LOCAL_GCC_VERSION 0 -# define LOCAL_GCC_PREREQ(maj, min) 0 -#endif - -// handle clang compatibility -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) -#if !defined(WORDS_BIGENDIAN) && \ - (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) -#define WORDS_BIGENDIAN -#endif - -#if defined(WORDS_BIGENDIAN) -#define HToLE32 BSwap32 -#define HToLE16 BSwap16 -#define HToBE64(x) (x) -#define HToBE32(x) (x) -#else -#define HToLE32(x) (x) -#define HToLE16(x) (x) -#define HToBE64(X) BSwap64(X) -#define HToBE32(X) BSwap32(X) -#endif - -#if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) -#define HAVE_BUILTIN_BSWAP16 -#endif - -#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) -#define HAVE_BUILTIN_BSWAP32 -#endif - -#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) -#define HAVE_BUILTIN_BSWAP64 -#endif - -#if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \ - defined(__mips_isa_rev) && (__mips_isa_rev >= 2) && (__mips_isa_rev < 6) -#define VPX_USE_MIPS32_R2 -#endif - -static INLINE uint16_t BSwap16(uint16_t x) { -#if defined(HAVE_BUILTIN_BSWAP16) - return __builtin_bswap16(x); -#elif defined(_MSC_VER) - return _byteswap_ushort(x); -#else - // gcc will recognize a 'rorw $8, ...' here: - return (x >> 8) | ((x & 0xff) << 8); -#endif // HAVE_BUILTIN_BSWAP16 -} - -static INLINE uint32_t BSwap32(uint32_t x) { -#if defined(VPX_USE_MIPS32_R2) - uint32_t ret; - __asm__ volatile ( - "wsbh %[ret], %[x] \n\t" - "rotr %[ret], %[ret], 16 \n\t" - : [ret]"=r"(ret) - : [x]"r"(x) - ); - return ret; -#elif defined(HAVE_BUILTIN_BSWAP32) - return __builtin_bswap32(x); -#elif defined(__i386__) || defined(__x86_64__) - uint32_t swapped_bytes; - __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x)); - return swapped_bytes; -#elif defined(_MSC_VER) - return (uint32_t)_byteswap_ulong(x); -#else - return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); -#endif // HAVE_BUILTIN_BSWAP32 -} - -static INLINE uint64_t BSwap64(uint64_t x) { -#if defined(HAVE_BUILTIN_BSWAP64) - return __builtin_bswap64(x); -#elif defined(__x86_64__) - uint64_t swapped_bytes; - __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x)); - return swapped_bytes; -#elif defined(_MSC_VER) - return (uint64_t)_byteswap_uint64(x); -#else // generic code for swapping 64-bit values (suggested by bdb@) - x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32); - x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16); - x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8); - return x; -#endif // HAVE_BUILTIN_BSWAP64 -} - -#endif // VPX_UTIL_ENDIAN_INL_H_ diff --git a/thirdparty/libvpx/vpx_util/vpx_thread.c b/thirdparty/libvpx/vpx_util/vpx_thread.c deleted file mode 100644 index 0bb0125bd4..0000000000 --- a/thirdparty/libvpx/vpx_util/vpx_thread.c +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 264210ba2807e4da47eb5d18c04cf869d89b9784 src/utils/thread.c - -#include -#include // for memset() -#include "./vpx_thread.h" -#include "vpx_mem/vpx_mem.h" - -#if CONFIG_MULTITHREAD - -struct VPxWorkerImpl { - pthread_mutex_t mutex_; - pthread_cond_t condition_; - pthread_t thread_; -}; - -//------------------------------------------------------------------------------ - -static void execute(VPxWorker *const worker); // Forward declaration. - -static THREADFN thread_loop(void *ptr) { - VPxWorker *const worker = (VPxWorker*)ptr; - int done = 0; - while (!done) { - pthread_mutex_lock(&worker->impl_->mutex_); - while (worker->status_ == OK) { // wait in idling mode - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - if (worker->status_ == WORK) { - execute(worker); - worker->status_ = OK; - } else if (worker->status_ == NOT_OK) { // finish the worker - done = 1; - } - // signal to the main thread that we're done (for sync()) - pthread_cond_signal(&worker->impl_->condition_); - pthread_mutex_unlock(&worker->impl_->mutex_); - } - return THREAD_RETURN(NULL); // Thread is finished -} - -// main thread state control -static void change_state(VPxWorker *const worker, - VPxWorkerStatus new_status) { - // No-op when attempting to change state on a thread that didn't come up. - // Checking status_ without acquiring the lock first would result in a data - // race. - if (worker->impl_ == NULL) return; - - pthread_mutex_lock(&worker->impl_->mutex_); - if (worker->status_ >= OK) { - // wait for the worker to finish - while (worker->status_ != OK) { - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - // assign new status and release the working thread if needed - if (new_status != OK) { - worker->status_ = new_status; - pthread_cond_signal(&worker->impl_->condition_); - } - } - pthread_mutex_unlock(&worker->impl_->mutex_); -} - -#endif // CONFIG_MULTITHREAD - -//------------------------------------------------------------------------------ - -static void init(VPxWorker *const worker) { - memset(worker, 0, sizeof(*worker)); - worker->status_ = NOT_OK; -} - -static int sync(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, OK); -#endif - assert(worker->status_ <= OK); - return !worker->had_error; -} - -static int reset(VPxWorker *const worker) { - int ok = 1; - worker->had_error = 0; - if (worker->status_ < OK) { -#if CONFIG_MULTITHREAD - worker->impl_ = (VPxWorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_)); - if (worker->impl_ == NULL) { - return 0; - } - if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { - goto Error; - } - if (pthread_cond_init(&worker->impl_->condition_, NULL)) { - pthread_mutex_destroy(&worker->impl_->mutex_); - goto Error; - } - pthread_mutex_lock(&worker->impl_->mutex_); - ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker); - if (ok) worker->status_ = OK; - pthread_mutex_unlock(&worker->impl_->mutex_); - if (!ok) { - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); - Error: - vpx_free(worker->impl_); - worker->impl_ = NULL; - return 0; - } -#else - worker->status_ = OK; -#endif - } else if (worker->status_ > OK) { - ok = sync(worker); - } - assert(!ok || (worker->status_ == OK)); - return ok; -} - -static void execute(VPxWorker *const worker) { - if (worker->hook != NULL) { - worker->had_error |= !worker->hook(worker->data1, worker->data2); - } -} - -static void launch(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, WORK); -#else - execute(worker); -#endif -} - -static void end(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - if (worker->impl_ != NULL) { - change_state(worker, NOT_OK); - pthread_join(worker->impl_->thread_, NULL); - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); - vpx_free(worker->impl_); - worker->impl_ = NULL; - } -#else - worker->status_ = NOT_OK; - assert(worker->impl_ == NULL); -#endif - assert(worker->status_ == NOT_OK); -} - -//------------------------------------------------------------------------------ - -static VPxWorkerInterface g_worker_interface = { - init, reset, sync, launch, execute, end -}; - -int vpx_set_worker_interface(const VPxWorkerInterface* const winterface) { - if (winterface == NULL || - winterface->init == NULL || winterface->reset == NULL || - winterface->sync == NULL || winterface->launch == NULL || - winterface->execute == NULL || winterface->end == NULL) { - return 0; - } - g_worker_interface = *winterface; - return 1; -} - -const VPxWorkerInterface *vpx_get_worker_interface(void) { - return &g_worker_interface; -} - -//------------------------------------------------------------------------------ diff --git a/thirdparty/libvpx/vpx_util/vpx_thread.h b/thirdparty/libvpx/vpx_util/vpx_thread.h deleted file mode 100644 index 2062abd75f..0000000000 --- a/thirdparty/libvpx/vpx_util/vpx_thread.h +++ /dev/null @@ -1,369 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h - -#ifndef VPX_THREAD_H_ -#define VPX_THREAD_H_ - -#include "./vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Set maximum decode threads to be 8 due to the limit of frame buffers -// and not enough semaphores in the emulation layer on windows. -#define MAX_DECODE_THREADS 8 - -#if CONFIG_MULTITHREAD - -#if defined(_WIN32) && !HAVE_PTHREAD_H -#include // NOLINT -#include // NOLINT -#include // NOLINT -typedef HANDLE pthread_t; -typedef CRITICAL_SECTION pthread_mutex_t; -typedef struct { - HANDLE waiting_sem_; - HANDLE received_sem_; - HANDLE signal_event_; -} pthread_cond_t; - -//------------------------------------------------------------------------------ -// simplistic pthread emulation layer - -// _beginthreadex requires __stdcall -#define THREADFN unsigned int __stdcall -#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) - -static INLINE int pthread_create(pthread_t* const thread, const void* attr, - unsigned int (__stdcall *start)(void*), - void* arg) { - (void)attr; - *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ - 0, /* unsigned stack_size */ - start, - arg, - 0, /* unsigned initflag */ - NULL); /* unsigned *thrdaddr */ - if (*thread == NULL) return 1; - SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); - return 0; -} - -static INLINE int pthread_join(pthread_t thread, void** value_ptr) { - (void)value_ptr; - return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || - CloseHandle(thread) == 0); -} - -// Mutex -static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, - void* mutexattr) { - (void)mutexattr; - InitializeCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { - return TryEnterCriticalSection(mutex) ? 0 : EBUSY; -} - -static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { - EnterCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { - LeaveCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { - DeleteCriticalSection(mutex); - return 0; -} - -// Condition -static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { - int ok = 1; - ok &= (CloseHandle(condition->waiting_sem_) != 0); - ok &= (CloseHandle(condition->received_sem_) != 0); - ok &= (CloseHandle(condition->signal_event_) != 0); - return !ok; -} - -static INLINE int pthread_cond_init(pthread_cond_t *const condition, - void* cond_attr) { - (void)cond_attr; - condition->waiting_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); - condition->received_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); - condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); - if (condition->waiting_sem_ == NULL || - condition->received_sem_ == NULL || - condition->signal_event_ == NULL) { - pthread_cond_destroy(condition); - return 1; - } - return 0; -} - -static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { - int ok = 1; - if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { - // a thread is waiting in pthread_cond_wait: allow it to be notified - ok = SetEvent(condition->signal_event_); - // wait until the event is consumed so the signaler cannot consume - // the event via its own pthread_cond_wait. - ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != - WAIT_OBJECT_0); - } - return !ok; -} - -static INLINE int pthread_cond_wait(pthread_cond_t *const condition, - pthread_mutex_t *const mutex) { - int ok; - // note that there is a consumer available so the signal isn't dropped in - // pthread_cond_signal - if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) - return 1; - // now unlock the mutex so pthread_cond_signal may be issued - pthread_mutex_unlock(mutex); - ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == - WAIT_OBJECT_0); - ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); - pthread_mutex_lock(mutex); - return !ok; -} -#elif defined(__OS2__) -#define INCL_DOS -#include // NOLINT - -#include // NOLINT -#include // NOLINT -#include // NOLINT - -#define pthread_t TID -#define pthread_mutex_t HMTX - -typedef struct { - HEV event_sem_; - HEV ack_sem_; - volatile unsigned wait_count_; -} pthread_cond_t; - -//------------------------------------------------------------------------------ -// simplistic pthread emulation layer - -#define THREADFN void * -#define THREAD_RETURN(val) (val) - -typedef struct { - void* (*start_)(void*); - void* arg_; -} thread_arg; - -static void thread_start(void* arg) { - thread_arg targ = *(thread_arg *)arg; - free(arg); - - targ.start_(targ.arg_); -} - -static INLINE int pthread_create(pthread_t* const thread, const void* attr, - void* (*start)(void*), - void* arg) { - int tid; - thread_arg *targ = (thread_arg *)malloc(sizeof(*targ)); - if (targ == NULL) return 1; - - (void)attr; - - targ->start_ = start; - targ->arg_ = arg; - tid = (pthread_t)_beginthread(thread_start, NULL, 1024 * 1024, targ); - if (tid == -1) { - free(targ); - return 1; - } - - *thread = tid; - return 0; -} - -static INLINE int pthread_join(pthread_t thread, void** value_ptr) { - (void)value_ptr; - return DosWaitThread(&thread, DCWW_WAIT) != 0; -} - -// Mutex -static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, - void* mutexattr) { - (void)mutexattr; - return DosCreateMutexSem(NULL, mutex, 0, FALSE) != 0; -} - -static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { - return DosRequestMutexSem(*mutex, SEM_IMMEDIATE_RETURN) == 0 ? 0 : EBUSY; -} - -static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { - return DosRequestMutexSem(*mutex, SEM_INDEFINITE_WAIT) != 0; -} - -static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { - return DosReleaseMutexSem(*mutex) != 0; -} - -static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { - return DosCloseMutexSem(*mutex) != 0; -} - -// Condition -static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { - int ok = 1; - ok &= DosCloseEventSem(condition->event_sem_) == 0; - ok &= DosCloseEventSem(condition->ack_sem_) == 0; - return !ok; -} - -static INLINE int pthread_cond_init(pthread_cond_t *const condition, - void* cond_attr) { - int ok = 1; - (void)cond_attr; - - ok &= DosCreateEventSem(NULL, &condition->event_sem_, DCE_POSTONE, FALSE) - == 0; - ok &= DosCreateEventSem(NULL, &condition->ack_sem_, DCE_POSTONE, FALSE) == 0; - if (!ok) { - pthread_cond_destroy(condition); - return 1; - } - condition->wait_count_ = 0; - return 0; -} - -static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { - int ok = 1; - - if (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) { - ok &= DosPostEventSem(condition->event_sem_) == 0; - ok &= DosWaitEventSem(condition->ack_sem_, SEM_INDEFINITE_WAIT) == 0; - } - - return !ok; -} - -static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { - int ok = 1; - - while (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) - ok &= pthread_cond_signal(condition) == 0; - - return !ok; -} - -static INLINE int pthread_cond_wait(pthread_cond_t *const condition, - pthread_mutex_t *const mutex) { - int ok = 1; - - __atomic_increment(&condition->wait_count_); - - ok &= pthread_mutex_unlock(mutex) == 0; - - ok &= DosWaitEventSem(condition->event_sem_, SEM_INDEFINITE_WAIT) == 0; - - __atomic_decrement(&condition->wait_count_); - - ok &= DosPostEventSem(condition->ack_sem_) == 0; - - pthread_mutex_lock(mutex); - - return !ok; -} -#else // _WIN32 -#include // NOLINT -# define THREADFN void* -# define THREAD_RETURN(val) val -#endif - -#endif // CONFIG_MULTITHREAD - -// State of the worker thread object -typedef enum { - NOT_OK = 0, // object is unusable - OK, // ready to work - WORK // busy finishing the current task -} VPxWorkerStatus; - -// Function to be called by the worker thread. Takes two opaque pointers as -// arguments (data1 and data2), and should return false in case of error. -typedef int (*VPxWorkerHook)(void*, void*); - -// Platform-dependent implementation details for the worker. -typedef struct VPxWorkerImpl VPxWorkerImpl; - -// Synchronization object used to launch job in the worker thread -typedef struct { - VPxWorkerImpl *impl_; - VPxWorkerStatus status_; - VPxWorkerHook hook; // hook to call - void *data1; // first argument passed to 'hook' - void *data2; // second argument passed to 'hook' - int had_error; // return value of the last call to 'hook' -} VPxWorker; - -// The interface for all thread-worker related functions. All these functions -// must be implemented. -typedef struct { - // Must be called first, before any other method. - void (*init)(VPxWorker *const worker); - // Must be called to initialize the object and spawn the thread. Re-entrant. - // Will potentially launch the thread. Returns false in case of error. - int (*reset)(VPxWorker *const worker); - // Makes sure the previous work is finished. Returns true if worker->had_error - // was not set and no error condition was triggered by the working thread. - int (*sync)(VPxWorker *const worker); - // Triggers the thread to call hook() with data1 and data2 arguments. These - // hook/data1/data2 values can be changed at any time before calling this - // function, but not be changed afterward until the next call to Sync(). - void (*launch)(VPxWorker *const worker); - // This function is similar to launch() except that it calls the - // hook directly instead of using a thread. Convenient to bypass the thread - // mechanism while still using the VPxWorker structs. sync() must - // still be called afterward (for error reporting). - void (*execute)(VPxWorker *const worker); - // Kill the thread and terminate the object. To use the object again, one - // must call reset() again. - void (*end)(VPxWorker *const worker); -} VPxWorkerInterface; - -// Install a new set of threading functions, overriding the defaults. This -// should be done before any workers are started, i.e., before any encoding or -// decoding takes place. The contents of the interface struct are copied, it -// is safe to free the corresponding memory after this call. This function is -// not thread-safe. Return false in case of invalid pointer or methods. -int vpx_set_worker_interface(const VPxWorkerInterface *const winterface); - -// Retrieve the currently set thread worker interface. -const VPxWorkerInterface *vpx_get_worker_interface(void); - -//------------------------------------------------------------------------------ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_THREAD_H_ diff --git a/thirdparty/libvpx/vpx_version.h b/thirdparty/libvpx/vpx_version.h deleted file mode 100644 index 5cff3b429f..0000000000 --- a/thirdparty/libvpx/vpx_version.h +++ /dev/null @@ -1,7 +0,0 @@ -#define VERSION_MAJOR 1 -#define VERSION_MINOR 6 -#define VERSION_PATCH 0 -#define VERSION_EXTRA "" -#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.6.0" -#define VERSION_STRING " v1.6.0" diff --git a/thirdparty/opus/COPYING b/thirdparty/opus/COPYING deleted file mode 100644 index 9c739c34a3..0000000000 --- a/thirdparty/opus/COPYING +++ /dev/null @@ -1,44 +0,0 @@ -Copyright 2001-2011 Xiph.Org, Skype Limited, Octasic, - Jean-Marc Valin, Timothy B. Terriberry, - CSIRO, Gregory Maxwell, Mark Borgerding, - Erik de Castro Lopo - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -- Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Opus is subject to the royalty-free patent licenses which are -specified at: - -Xiph.Org Foundation: -https://datatracker.ietf.org/ipr/1524/ - -Microsoft Corporation: -https://datatracker.ietf.org/ipr/1914/ - -Broadcom Corporation: -https://datatracker.ietf.org/ipr/1526/ diff --git a/thirdparty/opus/analysis.c b/thirdparty/opus/analysis.c deleted file mode 100644 index 663431a436..0000000000 --- a/thirdparty/opus/analysis.c +++ /dev/null @@ -1,672 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "kiss_fft.h" -#include "celt.h" -#include "modes.h" -#include "arch.h" -#include "quant_bands.h" -#include -#include "analysis.h" -#include "mlp.h" -#include "stack_alloc.h" - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -static const float dct_table[128] = { - 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, - 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, - 0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f, - -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f, - 0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f, - -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f, - 0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f, - 0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f, - 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f, - 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f, - 0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f, - -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f, - 0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f, - -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f, - 0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f, - 0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f, -}; - -static const float analysis_window[240] = { - 0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f, - 0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f, - 0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f, - 0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f, - 0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f, - 0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f, - 0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f, - 0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f, - 0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f, - 0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f, - 0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f, - 0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f, - 0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f, - 0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f, - 0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f, - 0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f, - 0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f, - 0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f, - 0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f, - 0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f, - 0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f, - 0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f, - 0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f, - 0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f, - 0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f, - 0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f, - 0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f, - 0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f, - 0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f, - 0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f, -}; - -static const int tbands[NB_TBANDS+1] = { - 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 -}; - -static const int extra_bands[NB_TOT_BANDS+1] = { - 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200 -}; - -/*static const float tweight[NB_TBANDS+1] = { - .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 -};*/ - -#define NB_TONAL_SKIP_BANDS 9 - -#define cA 0.43157974f -#define cB 0.67848403f -#define cC 0.08595542f -#define cE ((float)M_PI/2) -static OPUS_INLINE float fast_atan2f(float y, float x) { - float x2, y2; - /* Should avoid underflow on the values we'll get */ - if (ABS16(x)+ABS16(y)<1e-9f) - { - x*=1e12f; - y*=1e12f; - } - x2 = x*x; - y2 = y*y; - if(x2arch = opus_select_arch(); - /* Clear remaining fields. */ - tonality_analysis_reset(tonal); -} - -void tonality_analysis_reset(TonalityAnalysisState *tonal) -{ - /* Clear non-reusable fields. */ - char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START; - OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal)); -} - -void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len) -{ - int pos; - int curr_lookahead; - float psum; - int i; - - pos = tonal->read_pos; - curr_lookahead = tonal->write_pos-tonal->read_pos; - if (curr_lookahead<0) - curr_lookahead += DETECT_SIZE; - - if (len > 480 && pos != tonal->write_pos) - { - pos++; - if (pos==DETECT_SIZE) - pos=0; - } - if (pos == tonal->write_pos) - pos--; - if (pos<0) - pos = DETECT_SIZE-1; - OPUS_COPY(info_out, &tonal->info[pos], 1); - tonal->read_subframe += len/120; - while (tonal->read_subframe>=4) - { - tonal->read_subframe -= 4; - tonal->read_pos++; - } - if (tonal->read_pos>=DETECT_SIZE) - tonal->read_pos-=DETECT_SIZE; - - /* Compensate for the delay in the features themselves. - FIXME: Need a better estimate the 10 I just made up */ - curr_lookahead = IMAX(curr_lookahead-10, 0); - - psum=0; - /* Summing the probability of transition patterns that involve music at - time (DETECT_SIZE-curr_lookahead-1) */ - for (i=0;ipmusic[i]; - for (;ipspeech[i]; - psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; - /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ - - info_out->music_prob = psum; -} - -static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) -{ - int i, b; - const kiss_fft_state *kfft; - VARDECL(kiss_fft_cpx, in); - VARDECL(kiss_fft_cpx, out); - int N = 480, N2=240; - float * OPUS_RESTRICT A = tonal->angle; - float * OPUS_RESTRICT dA = tonal->d_angle; - float * OPUS_RESTRICT d2A = tonal->d2_angle; - VARDECL(float, tonality); - VARDECL(float, noisiness); - float band_tonality[NB_TBANDS]; - float logE[NB_TBANDS]; - float BFCC[8]; - float features[25]; - float frame_tonality; - float max_frame_tonality; - /*float tw_sum=0;*/ - float frame_noisiness; - const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI); - float slope=0; - float frame_stationarity; - float relativeE; - float frame_probs[2]; - float alpha, alphaE, alphaE2; - float frame_loudness; - float bandwidth_mask; - int bandwidth=0; - float maxE = 0; - float noise_floor; - int remaining; - AnalysisInfo *info; - SAVE_STACK; - - tonal->last_transition++; - alpha = 1.f/IMIN(20, 1+tonal->count); - alphaE = 1.f/IMIN(50, 1+tonal->count); - alphaE2 = 1.f/IMIN(1000, 1+tonal->count); - - if (tonal->count<4) - tonal->music_prob = .5; - kfft = celt_mode->mdct.kfft[0]; - if (tonal->count==0) - tonal->mem_fill = 240; - downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); - if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) - { - tonal->mem_fill += len; - /* Don't have enough to update the analysis */ - RESTORE_STACK; - return; - } - info = &tonal->info[tonal->write_pos++]; - if (tonal->write_pos>=DETECT_SIZE) - tonal->write_pos-=DETECT_SIZE; - - ALLOC(in, 480, kiss_fft_cpx); - ALLOC(out, 480, kiss_fft_cpx); - ALLOC(tonality, 240, float); - ALLOC(noisiness, 240, float); - for (i=0;iinmem[i]); - in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); - in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); - in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); - } - OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); - remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); - downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); - tonal->mem_fill = 240 + remaining; - opus_fft(kfft, in, out, tonal->arch); -#ifndef FIXED_POINT - /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ - if (celt_isnan(out[0].r)) - { - info->valid = 0; - RESTORE_STACK; - return; - } -#endif - - for (i=1;iactivity = 0; - frame_noisiness = 0; - frame_stationarity = 0; - if (!tonal->count) - { - for (b=0;blowE[b] = 1e10; - tonal->highE[b] = -1e10; - } - } - relativeE = 0; - frame_loudness = 0; - for (b=0;bvalid = 0; - RESTORE_STACK; - return; - } -#endif - - tonal->E[tonal->E_count][b] = E; - frame_noisiness += nE/(1e-15f+E); - - frame_loudness += (float)sqrt(E+1e-10f); - logE[b] = (float)log(E+1e-10f); - tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); - tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); - if (tonal->highE[b] < tonal->lowE[b]+1.f) - { - tonal->highE[b]+=.5f; - tonal->lowE[b]-=.5f; - } - relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]); - - L1=L2=0; - for (i=0;iE[i][b]); - L2 += tonal->E[i][b]; - } - - stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2)); - stationarity *= stationarity; - stationarity *= stationarity; - frame_stationarity += stationarity; - /*band_tonality[b] = tE/(1e-15+E)*/; - band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]); -#if 0 - if (b>=NB_TONAL_SKIP_BANDS) - { - frame_tonality += tweight[b]*band_tonality[b]; - tw_sum += tweight[b]; - } -#else - frame_tonality += band_tonality[b]; - if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS) - frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS]; -#endif - max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality); - slope += band_tonality[b]*(b-8); - /*printf("%f %f ", band_tonality[b], stationarity);*/ - tonal->prev_band_tonality[b] = band_tonality[b]; - } - - bandwidth_mask = 0; - bandwidth = 0; - maxE = 0; - noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); -#ifdef FIXED_POINT - noise_floor *= 1<<(15+SIG_SHIFT); -#endif - noise_floor *= noise_floor; - for (b=0;bmeanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E); - E = MAX32(E, tonal->meanE[b]); - /* Use a simple follower with 13 dB/Bark slope for spreading function */ - bandwidth_mask = MAX32(.05f*bandwidth_mask, E); - /* Consider the band "active" only if all these conditions are met: - 1) less than 10 dB below the simple follower - 2) less than 90 dB below the peak band (maximal masking possible considering - both the ATH and the loudness-dependent slope of the spreading function) - 3) above the PCM quantization noise floor - */ - if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start)) - bandwidth = b; - } - if (tonal->count<=2) - bandwidth = 20; - frame_loudness = 20*(float)log10(frame_loudness); - tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness); - tonal->lowECount *= (1-alphaE); - if (frame_loudness < tonal->Etracker-30) - tonal->lowECount += alphaE; - - for (i=0;i<8;i++) - { - float sum=0; - for (b=0;b<16;b++) - sum += dct_table[i*16+b]*logE[b]; - BFCC[i] = sum; - } - - frame_stationarity /= NB_TBANDS; - relativeE /= NB_TBANDS; - if (tonal->count<10) - relativeE = .5; - frame_noisiness /= NB_TBANDS; -#if 1 - info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; -#else - info->activity = .5*(1+frame_noisiness-frame_stationarity); -#endif - frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS)); - frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f); - tonal->prev_tonality = frame_tonality; - - slope /= 8*8; - info->tonality_slope = slope; - - tonal->E_count = (tonal->E_count+1)%NB_FRAMES; - tonal->count++; - info->tonality = frame_tonality; - - for (i=0;i<4;i++) - features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i]; - - for (i=0;i<4;i++) - tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i]; - - for (i=0;i<4;i++) - features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]); - for (i=0;i<3;i++) - features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8]; - - if (tonal->count > 5) - { - for (i=0;i<9;i++) - tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i]; - } - - for (i=0;i<8;i++) - { - tonal->mem[i+24] = tonal->mem[i+16]; - tonal->mem[i+16] = tonal->mem[i+8]; - tonal->mem[i+8] = tonal->mem[i]; - tonal->mem[i] = BFCC[i]; - } - for (i=0;i<9;i++) - features[11+i] = (float)sqrt(tonal->std[i]); - features[20] = info->tonality; - features[21] = info->activity; - features[22] = frame_stationarity; - features[23] = info->tonality_slope; - features[24] = tonal->lowECount; - -#ifndef DISABLE_FLOAT_API - mlp_process(&net, features, frame_probs); - frame_probs[0] = .5f*(frame_probs[0]+1); - /* Curve fitting between the MLP probability and the actual probability */ - frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10); - /* Probability of active audio (as opposed to silence) */ - frame_probs[1] = .5f*frame_probs[1]+.5f; - /* Consider that silence has a 50-50 probability. */ - frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f; - - /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/ - { - /* Probability of state transition */ - float tau; - /* Represents independence of the MLP probabilities, where - beta=1 means fully independent. */ - float beta; - /* Denormalized probability of speech (p0) and music (p1) after update */ - float p0, p1; - /* Probabilities for "all speech" and "all music" */ - float s0, m0; - /* Probability sum for renormalisation */ - float psum; - /* Instantaneous probability of speech and music, with beta pre-applied. */ - float speech0; - float music0; - float p, q; - - /* One transition every 3 minutes of active audio */ - tau = .00005f*frame_probs[1]; - /* Adapt beta based on how "unexpected" the new prob is */ - p = MAX16(.05f,MIN16(.95f,frame_probs[0])); - q = MAX16(.05f,MIN16(.95f,tonal->music_prob)); - beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p)); - /* p0 and p1 are the probabilities of speech and music at this frame - using only information from previous frame and applying the - state transition model */ - p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; - p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; - /* We apply the current probability with exponent beta to work around - the fact that the probability estimates aren't independent. */ - p0 *= (float)pow(1-frame_probs[0], beta); - p1 *= (float)pow(frame_probs[0], beta); - /* Normalise the probabilities to get the Marokv probability of music. */ - tonal->music_prob = p1/(p0+p1); - info->music_prob = tonal->music_prob; - - /* This chunk of code deals with delayed decision. */ - psum=1e-20f; - /* Instantaneous probability of speech and music, with beta pre-applied. */ - speech0 = (float)pow(1-frame_probs[0], beta); - music0 = (float)pow(frame_probs[0], beta); - if (tonal->count==1) - { - tonal->pspeech[0]=.5; - tonal->pmusic [0]=.5; - } - /* Updated probability of having only speech (s0) or only music (m0), - before considering the new observation. */ - s0 = tonal->pspeech[0] + tonal->pspeech[1]; - m0 = tonal->pmusic [0] + tonal->pmusic [1]; - /* Updates s0 and m0 with instantaneous probability. */ - tonal->pspeech[0] = s0*(1-tau)*speech0; - tonal->pmusic [0] = m0*(1-tau)*music0; - /* Propagate the transition probabilities */ - for (i=1;ipspeech[i] = tonal->pspeech[i+1]*speech0; - tonal->pmusic [i] = tonal->pmusic [i+1]*music0; - } - /* Probability that the latest frame is speech, when all the previous ones were music. */ - tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0; - /* Probability that the latest frame is music, when all the previous ones were speech. */ - tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0; - - /* Renormalise probabilities to 1 */ - for (i=0;ipspeech[i] + tonal->pmusic[i]; - psum = 1.f/psum; - for (i=0;ipspeech[i] *= psum; - tonal->pmusic [i] *= psum; - } - psum = tonal->pmusic[0]; - for (i=1;ipspeech[i]; - - /* Estimate our confidence in the speech/music decisions */ - if (frame_probs[1]>.75) - { - if (tonal->music_prob>.9) - { - float adapt; - adapt = 1.f/(++tonal->music_confidence_count); - tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500); - tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence); - } - if (tonal->music_prob<.1) - { - float adapt; - adapt = 1.f/(++tonal->speech_confidence_count); - tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500); - tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence); - } - } else { - if (tonal->music_confidence_count==0) - tonal->music_confidence = .9f; - if (tonal->speech_confidence_count==0) - tonal->speech_confidence = .1f; - } - } - if (tonal->last_music != (tonal->music_prob>.5f)) - tonal->last_transition=0; - tonal->last_music = tonal->music_prob>.5f; -#else - info->music_prob = 0; -#endif - /*for (i=0;i<25;i++) - printf("%f ", features[i]); - printf("\n");*/ - - info->bandwidth = bandwidth; - /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ - info->noisiness = frame_noisiness; - info->valid = 1; - RESTORE_STACK; -} - -void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, - int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) -{ - int offset; - int pcm_len; - - if (analysis_pcm != NULL) - { - /* Avoid overflow/wrap-around of the analysis buffer */ - analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); - - pcm_len = analysis_frame_size - analysis->analysis_offset; - offset = analysis->analysis_offset; - do { - tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); - offset += 480; - pcm_len -= 480; - } while (pcm_len>0); - analysis->analysis_offset = analysis_frame_size; - - analysis->analysis_offset -= frame_size; - } - - analysis_info->valid = 0; - tonality_get_info(analysis, analysis_info, frame_size); -} diff --git a/thirdparty/opus/analysis.h b/thirdparty/opus/analysis.h deleted file mode 100644 index 9eae56a525..0000000000 --- a/thirdparty/opus/analysis.h +++ /dev/null @@ -1,103 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef ANALYSIS_H -#define ANALYSIS_H - -#include "celt.h" -#include "opus_private.h" - -#define NB_FRAMES 8 -#define NB_TBANDS 18 -#define NB_TOT_BANDS 21 -#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */ - -#define DETECT_SIZE 200 - -typedef struct { - int arch; -#define TONALITY_ANALYSIS_RESET_START angle - float angle[240]; - float d_angle[240]; - float d2_angle[240]; - opus_val32 inmem[ANALYSIS_BUF_SIZE]; - int mem_fill; /* number of usable samples in the buffer */ - float prev_band_tonality[NB_TBANDS]; - float prev_tonality; - float E[NB_FRAMES][NB_TBANDS]; - float lowE[NB_TBANDS]; - float highE[NB_TBANDS]; - float meanE[NB_TOT_BANDS]; - float mem[32]; - float cmean[8]; - float std[9]; - float music_prob; - float Etracker; - float lowECount; - int E_count; - int last_music; - int last_transition; - int count; - float subframe_mem[3]; - int analysis_offset; - /** Probability of having speech for time i to DETECT_SIZE-1 (and music before). - pspeech[0] is the probability that all frames in the window are speech. */ - float pspeech[DETECT_SIZE]; - /** Probability of having music for time i to DETECT_SIZE-1 (and speech before). - pmusic[0] is the probability that all frames in the window are music. */ - float pmusic[DETECT_SIZE]; - float speech_confidence; - float music_confidence; - int speech_confidence_count; - int music_confidence_count; - int write_pos; - int read_pos; - int read_subframe; - AnalysisInfo info[DETECT_SIZE]; -} TonalityAnalysisState; - -/** Initialize a TonalityAnalysisState struct. - * - * This performs some possibly slow initialization steps which should - * not be repeated every analysis step. No allocated memory is retained - * by the state struct, so no cleanup call is required. - */ -void tonality_analysis_init(TonalityAnalysisState *analysis); - -/** Reset a TonalityAnalysisState stuct. - * - * Call this when there's a discontinuity in the data. - */ -void tonality_analysis_reset(TonalityAnalysisState *analysis); - -void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); - -void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, - int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); - -#endif diff --git a/thirdparty/opus/celt/_kiss_fft_guts.h b/thirdparty/opus/celt/_kiss_fft_guts.h deleted file mode 100644 index 5e3d58fd66..0000000000 --- a/thirdparty/opus/celt/_kiss_fft_guts.h +++ /dev/null @@ -1,182 +0,0 @@ -/*Copyright (c) 2003-2004, Mark Borgerding - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_GUTS_H -#define KISS_FFT_GUTS_H - -#define MIN(a,b) ((a)<(b) ? (a):(b)) -#define MAX(a,b) ((a)>(b) ? (a):(b)) - -/* kiss_fft.h - defines kiss_fft_scalar as either short or a float type - and defines - typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ -#include "kiss_fft.h" - -/* - Explanation of macros dealing with complex math: - - C_MUL(m,a,b) : m = a*b - C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise - C_SUB( res, a,b) : res = a - b - C_SUBFROM( res , a) : res -= a - C_ADDTO( res , a) : res += a - * */ -#ifdef FIXED_POINT -#include "arch.h" - - -#define SAMP_MAX 2147483647 -#define TWID_MAX 32767 -#define TRIG_UPSCALE 1 - -#define SAMP_MIN -SAMP_MAX - - -# define S_MUL(a,b) MULT16_32_Q15(b, a) - -# define C_MUL(m,a,b) \ - do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) - -# define C_MULC(m,a,b) \ - do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) - -# define C_MULBYSCALAR( c, s ) \ - do{ (c).r = S_MUL( (c).r , s ) ;\ - (c).i = S_MUL( (c).i , s ) ; }while(0) - -# define DIVSCALAR(x,k) \ - (x) = S_MUL( x, (TWID_MAX-((k)>>1))/(k)+1 ) - -# define C_FIXDIV(c,div) \ - do { DIVSCALAR( (c).r , div); \ - DIVSCALAR( (c).i , div); }while (0) - -#define C_ADD( res, a,b)\ - do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ - }while(0) -#define C_SUB( res, a,b)\ - do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ - }while(0) -#define C_ADDTO( res , a)\ - do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ - }while(0) - -#define C_SUBFROM( res , a)\ - do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ - }while(0) - -#if defined(OPUS_ARM_INLINE_ASM) -#include "arm/kiss_fft_armv4.h" -#endif - -#if defined(OPUS_ARM_INLINE_EDSP) -#include "arm/kiss_fft_armv5e.h" -#endif -#if defined(MIPSr1_ASM) -#include "mips/kiss_fft_mipsr1.h" -#endif - -#else /* not FIXED_POINT*/ - -# define S_MUL(a,b) ( (a)*(b) ) -#define C_MUL(m,a,b) \ - do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ - (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) -#define C_MULC(m,a,b) \ - do{ (m).r = (a).r*(b).r + (a).i*(b).i;\ - (m).i = (a).i*(b).r - (a).r*(b).i; }while(0) - -#define C_MUL4(m,a,b) C_MUL(m,a,b) - -# define C_FIXDIV(c,div) /* NOOP */ -# define C_MULBYSCALAR( c, s ) \ - do{ (c).r *= (s);\ - (c).i *= (s); }while(0) -#endif - -#ifndef CHECK_OVERFLOW_OP -# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ -#endif - -#ifndef C_ADD -#define C_ADD( res, a,b)\ - do { \ - CHECK_OVERFLOW_OP((a).r,+,(b).r)\ - CHECK_OVERFLOW_OP((a).i,+,(b).i)\ - (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ - }while(0) -#define C_SUB( res, a,b)\ - do { \ - CHECK_OVERFLOW_OP((a).r,-,(b).r)\ - CHECK_OVERFLOW_OP((a).i,-,(b).i)\ - (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ - }while(0) -#define C_ADDTO( res , a)\ - do { \ - CHECK_OVERFLOW_OP((res).r,+,(a).r)\ - CHECK_OVERFLOW_OP((res).i,+,(a).i)\ - (res).r += (a).r; (res).i += (a).i;\ - }while(0) - -#define C_SUBFROM( res , a)\ - do {\ - CHECK_OVERFLOW_OP((res).r,-,(a).r)\ - CHECK_OVERFLOW_OP((res).i,-,(a).i)\ - (res).r -= (a).r; (res).i -= (a).i; \ - }while(0) -#endif /* C_ADD defined */ - -#ifdef FIXED_POINT -/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase)))) -# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/ -# define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase)) -# define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase)) -# define HALF_OF(x) ((x)>>1) -#elif defined(USE_SIMD) -# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) -# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) -# define HALF_OF(x) ((x)*_mm_set1_ps(.5f)) -#else -# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) -# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) -# define HALF_OF(x) ((x)*.5f) -#endif - -#define kf_cexp(x,phase) \ - do{ \ - (x)->r = KISS_FFT_COS(phase);\ - (x)->i = KISS_FFT_SIN(phase);\ - }while(0) - -#define kf_cexp2(x,phase) \ - do{ \ - (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\ - (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\ -}while(0) - -#endif /* KISS_FFT_GUTS_H */ diff --git a/thirdparty/opus/celt/arch.h b/thirdparty/opus/celt/arch.h deleted file mode 100644 index 8ceab5fe10..0000000000 --- a/thirdparty/opus/celt/arch.h +++ /dev/null @@ -1,252 +0,0 @@ -/* Copyright (c) 2003-2008 Jean-Marc Valin - Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file arch.h - @brief Various architecture definitions for CELT -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef ARCH_H -#define ARCH_H - -#include "opus_types.h" -#include "opus_defines.h" - -# if !defined(__GNUC_PREREQ) -# if defined(__GNUC__)&&defined(__GNUC_MINOR__) -# define __GNUC_PREREQ(_maj,_min) \ - ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) -# else -# define __GNUC_PREREQ(_maj,_min) 0 -# endif -# endif - -#define CELT_SIG_SCALE 32768.f - -#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); -#ifdef ENABLE_ASSERTIONS -#include -#include -#ifdef __GNUC__ -__attribute__((noreturn)) -#endif -static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) -{ - fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); - abort(); -} -#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} -#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} -#else -#define celt_assert(cond) -#define celt_assert2(cond, message) -#endif - -#define IMUL32(a,b) ((a)*(b)) - -#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ -#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ -#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ -#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ -#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ -#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */ -#define UADD32(a,b) ((a)+(b)) -#define USUB32(a,b) ((a)-(b)) - -/* Set this if opus_int64 is a native type of the CPU. */ -/* Assume that all LP64 architectures have fast 64-bit types; also x86_64 - (which can be ILP32 for x32) and Win64 (which is LLP64). */ -#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64) -#define OPUS_FAST_INT64 1 -#else -#define OPUS_FAST_INT64 0 -#endif - -#define PRINT_MIPS(file) - -#ifdef FIXED_POINT - -typedef opus_int16 opus_val16; -typedef opus_int32 opus_val32; - -typedef opus_val32 celt_sig; -typedef opus_val16 celt_norm; -typedef opus_val32 celt_ener; - -#define Q15ONE 32767 - -#define SIG_SHIFT 12 - -#define NORM_SCALING 16384 - -#define DB_SHIFT 10 - -#define EPSILON 1 -#define VERY_SMALL 0 -#define VERY_LARGE16 ((opus_val16)32767) -#define Q15_ONE ((opus_val16)32767) - -#define SCALEIN(a) (a) -#define SCALEOUT(a) (a) - -#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) -#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) - -static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { - return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; -} - -#ifdef FIXED_DEBUG -#include "fixed_debug.h" -#else - -#include "fixed_generic.h" - -#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR -#include "arm/fixed_arm64.h" -#elif OPUS_ARM_INLINE_EDSP -#include "arm/fixed_armv5e.h" -#elif defined (OPUS_ARM_INLINE_ASM) -#include "arm/fixed_armv4.h" -#elif defined (BFIN_ASM) -#include "fixed_bfin.h" -#elif defined (TI_C5X_ASM) -#include "fixed_c5x.h" -#elif defined (TI_C6X_ASM) -#include "fixed_c6x.h" -#endif - -#endif - -#else /* FIXED_POINT */ - -typedef float opus_val16; -typedef float opus_val32; - -typedef float celt_sig; -typedef float celt_norm; -typedef float celt_ener; - -#ifdef FLOAT_APPROX -/* This code should reliably detect NaN/inf even when -ffast-math is used. - Assumes IEEE 754 format. */ -static OPUS_INLINE int celt_isnan(float x) -{ - union {float f; opus_uint32 i;} in; - in.f = x; - return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; -} -#else -#ifdef __FAST_MATH__ -#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input -#endif -#define celt_isnan(x) ((x)!=(x)) -#endif - -#define Q15ONE 1.0f - -#define NORM_SCALING 1.f - -#define EPSILON 1e-15f -#define VERY_SMALL 1e-30f -#define VERY_LARGE16 1e15f -#define Q15_ONE ((opus_val16)1.f) - -/* This appears to be the same speed as C99's fabsf() but it's more portable. */ -#define ABS16(x) ((float)fabs(x)) -#define ABS32(x) ((float)fabs(x)) - -#define QCONST16(x,bits) (x) -#define QCONST32(x,bits) (x) - -#define NEG16(x) (-(x)) -#define NEG32(x) (-(x)) -#define EXTRACT16(x) (x) -#define EXTEND32(x) (x) -#define SHR16(a,shift) (a) -#define SHL16(a,shift) (a) -#define SHR32(a,shift) (a) -#define SHL32(a,shift) (a) -#define PSHR32(a,shift) (a) -#define VSHR32(a,shift) (a) - -#define PSHR(a,shift) (a) -#define SHR(a,shift) (a) -#define SHL(a,shift) (a) -#define SATURATE(x,a) (x) -#define SATURATE16(x) (x) - -#define ROUND16(a,shift) (a) -#define HALF16(x) (.5f*(x)) -#define HALF32(x) (.5f*(x)) - -#define ADD16(a,b) ((a)+(b)) -#define SUB16(a,b) ((a)-(b)) -#define ADD32(a,b) ((a)+(b)) -#define SUB32(a,b) ((a)-(b)) -#define MULT16_16_16(a,b) ((a)*(b)) -#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) -#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) - -#define MULT16_32_Q15(a,b) ((a)*(b)) -#define MULT16_32_Q16(a,b) ((a)*(b)) - -#define MULT32_32_Q31(a,b) ((a)*(b)) - -#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) -#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) - -#define MULT16_16_Q11_32(a,b) ((a)*(b)) -#define MULT16_16_Q11(a,b) ((a)*(b)) -#define MULT16_16_Q13(a,b) ((a)*(b)) -#define MULT16_16_Q14(a,b) ((a)*(b)) -#define MULT16_16_Q15(a,b) ((a)*(b)) -#define MULT16_16_P15(a,b) ((a)*(b)) -#define MULT16_16_P13(a,b) ((a)*(b)) -#define MULT16_16_P14(a,b) ((a)*(b)) -#define MULT16_32_P16(a,b) ((a)*(b)) - -#define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b)) -#define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b)) - -#define SCALEIN(a) ((a)*CELT_SIG_SCALE) -#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) - -#define SIG2WORD16(x) (x) - -#endif /* !FIXED_POINT */ - -#ifndef GLOBAL_STACK_SIZE -#ifdef FIXED_POINT -#define GLOBAL_STACK_SIZE 100000 -#else -#define GLOBAL_STACK_SIZE 100000 -#endif -#endif - -#endif /* ARCH_H */ diff --git a/thirdparty/opus/celt/arm/arm_celt_map.c b/thirdparty/opus/celt/arm/arm_celt_map.c deleted file mode 100644 index 4d4d069a86..0000000000 --- a/thirdparty/opus/celt/arm/arm_celt_map.c +++ /dev/null @@ -1,143 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pitch.h" -#include "kiss_fft.h" -#include "mdct.h" - -#if defined(OPUS_HAVE_RTCD) - -# if defined(FIXED_POINT) -# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ - (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ - (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) -opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int , int) = { - celt_pitch_xcorr_c, /* ARMv4 */ - MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */ - MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ - MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ -}; - -# endif -# else /* !FIXED_POINT */ -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) -void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int) = { - celt_pitch_xcorr_c, /* ARMv4 */ - celt_pitch_xcorr_c, /* EDSP */ - celt_pitch_xcorr_c, /* Media */ - celt_pitch_xcorr_float_neon /* Neon */ -}; -# endif -# endif /* FIXED_POINT */ - -#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \ - defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) - -void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len -) = { - xcorr_kernel_c, /* ARMv4 */ - xcorr_kernel_c, /* EDSP */ - xcorr_kernel_c, /* Media */ - xcorr_kernel_neon_fixed, /* Neon */ -}; - -#endif - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# if defined(HAVE_ARM_NE10) -# if defined(CUSTOM_MODES) -int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { - opus_fft_alloc_arch_c, /* ARMv4 */ - opus_fft_alloc_arch_c, /* EDSP */ - opus_fft_alloc_arch_c, /* Media */ - opus_fft_alloc_arm_neon /* Neon with NE10 library support */ -}; - -void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { - opus_fft_free_arch_c, /* ARMv4 */ - opus_fft_free_arch_c, /* EDSP */ - opus_fft_free_arch_c, /* Media */ - opus_fft_free_arm_neon /* Neon with NE10 */ -}; -# endif /* CUSTOM_MODES */ - -void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) = { - opus_fft_c, /* ARMv4 */ - opus_fft_c, /* EDSP */ - opus_fft_c, /* Media */ - opus_fft_neon /* Neon with NE10 */ -}; - -void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) = { - opus_ifft_c, /* ARMv4 */ - opus_ifft_c, /* EDSP */ - opus_ifft_c, /* Media */ - opus_ifft_neon /* Neon with NE10 */ -}; - -void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, - int overlap, int shift, - int stride, int arch) = { - clt_mdct_forward_c, /* ARMv4 */ - clt_mdct_forward_c, /* EDSP */ - clt_mdct_forward_c, /* Media */ - clt_mdct_forward_neon /* Neon with NE10 */ -}; - -void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, - int overlap, int shift, - int stride, int arch) = { - clt_mdct_backward_c, /* ARMv4 */ - clt_mdct_backward_c, /* EDSP */ - clt_mdct_backward_c, /* Media */ - clt_mdct_backward_neon /* Neon with NE10 */ -}; - -# endif /* HAVE_ARM_NE10 */ -# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */ - -#endif /* OPUS_HAVE_RTCD */ diff --git a/thirdparty/opus/celt/arm/armcpu.c b/thirdparty/opus/celt/arm/armcpu.c deleted file mode 100644 index 694a63b78e..0000000000 --- a/thirdparty/opus/celt/arm/armcpu.c +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from libtheora modified to suit to Opus */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef OPUS_HAVE_RTCD - -#include "armcpu.h" -#include "cpu_support.h" -#include "os_support.h" -#include "opus_types.h" -#include "arch.h" - -#define OPUS_CPU_ARM_V4_FLAG (1< - -static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ - opus_uint32 flags; - flags=0; - /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit - * instructions via their assembled hex code. - * All of these instructions should be essentially nops. */ -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - __try{ - /*PLD [r13]*/ - __emit(0xF5DDF000); - flags|=OPUS_CPU_ARM_EDSP_FLAG; - } - __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ - /*Ignore exception.*/ - } -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - __try{ - /*SHADD8 r3,r3,r3*/ - __emit(0xE6333F93); - flags|=OPUS_CPU_ARM_MEDIA_FLAG; - } - __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ - /*Ignore exception.*/ - } -# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - __try{ - /*VORR q0,q0,q0*/ - __emit(0xF2200150); - flags|=OPUS_CPU_ARM_NEON_FLAG; - } - __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ - /*Ignore exception.*/ - } -# endif -# endif -# endif - return flags; -} - -#elif defined(__linux__) -/* Linux based */ -opus_uint32 opus_cpu_capabilities(void) -{ - opus_uint32 flags = 0; - FILE *cpuinfo; - - /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on - * Android */ - cpuinfo = fopen("/proc/cpuinfo", "r"); - - if(cpuinfo != NULL) - { - /* 512 should be enough for anybody (it's even enough for all the flags that - * x86 has accumulated... so far). */ - char buf[512]; - - while(fgets(buf, 512, cpuinfo) != NULL) - { -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - /* Search for edsp and neon flag */ - if(memcmp(buf, "Features", 8) == 0) - { - char *p; - p = strstr(buf, " edsp"); - if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_EDSP_FLAG; - -# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - p = strstr(buf, " neon"); - if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_NEON_FLAG; -# endif - } -# endif - -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - /* Search for media capabilities (>= ARMv6) */ - if(memcmp(buf, "CPU architecture:", 17) == 0) - { - int version; - version = atoi(buf+17); - - if(version >= 6) - flags |= OPUS_CPU_ARM_MEDIA_FLAG; - } -# endif - } - - fclose(cpuinfo); - } - return flags; -} -#else -/* The feature registers which can tell us what the processor supports are - * accessible in priveleged modes only, so we can't have a general user-space - * detection method like on x86.*/ -# error "Configured to use ARM asm but no CPU detection method available for " \ - "your platform. Reconfigure with --disable-rtcd (or send patches)." -#endif - -int opus_select_arch(void) -{ - opus_uint32 flags = opus_cpu_capabilities(); - int arch = 0; - - if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) { - /* Asserts ensure arch values are sequential */ - celt_assert(arch == OPUS_ARCH_ARM_V4); - return arch; - } - arch++; - - if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) { - celt_assert(arch == OPUS_ARCH_ARM_EDSP); - return arch; - } - arch++; - - if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) { - celt_assert(arch == OPUS_ARCH_ARM_MEDIA); - return arch; - } - arch++; - - celt_assert(arch == OPUS_ARCH_ARM_NEON); - return arch; -} - -#endif diff --git a/thirdparty/opus/celt/arm/armcpu.h b/thirdparty/opus/celt/arm/armcpu.h deleted file mode 100644 index 820262ff5f..0000000000 --- a/thirdparty/opus/celt/arm/armcpu.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(ARMCPU_H) -# define ARMCPU_H - -# if defined(OPUS_ARM_MAY_HAVE_EDSP) -# define MAY_HAVE_EDSP(name) name ## _edsp -# else -# define MAY_HAVE_EDSP(name) name ## _c -# endif - -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) -# define MAY_HAVE_MEDIA(name) name ## _media -# else -# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name) -# endif - -# if defined(OPUS_ARM_MAY_HAVE_NEON) -# define MAY_HAVE_NEON(name) name ## _neon -# else -# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name) -# endif - -# if defined(OPUS_ARM_PRESUME_EDSP) -# define PRESUME_EDSP(name) name ## _edsp -# else -# define PRESUME_EDSP(name) name ## _c -# endif - -# if defined(OPUS_ARM_PRESUME_MEDIA) -# define PRESUME_MEDIA(name) name ## _media -# else -# define PRESUME_MEDIA(name) PRESUME_EDSP(name) -# endif - -# if defined(OPUS_ARM_PRESUME_NEON) -# define PRESUME_NEON(name) name ## _neon -# else -# define PRESUME_NEON(name) PRESUME_MEDIA(name) -# endif - -# if defined(OPUS_HAVE_RTCD) -int opus_select_arch(void); - -#define OPUS_ARCH_ARM_V4 (0) -#define OPUS_ARCH_ARM_EDSP (1) -#define OPUS_ARCH_ARM_MEDIA (2) -#define OPUS_ARCH_ARM_NEON (3) - -# endif - -#endif diff --git a/thirdparty/opus/celt/arm/armopts.s.in b/thirdparty/opus/celt/arm/armopts.s.in deleted file mode 100644 index 3d8aaf2754..0000000000 --- a/thirdparty/opus/celt/arm/armopts.s.in +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (C) 2013 Mozilla Corporation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -; Set the following to 1 if we have EDSP instructions -; (LDRD/STRD, etc., ARMv5E and later). -OPUS_ARM_MAY_HAVE_EDSP * @OPUS_ARM_MAY_HAVE_EDSP@ - -; Set the following to 1 if we have ARMv6 media instructions. -OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@ - -; Set the following to 1 if we have NEON (some ARMv7) -OPUS_ARM_MAY_HAVE_NEON * @OPUS_ARM_MAY_HAVE_NEON@ - -END diff --git a/thirdparty/opus/celt/arm/celt_ne10_fft.c b/thirdparty/opus/celt/arm/celt_ne10_fft.c deleted file mode 100644 index 42d96a7117..0000000000 --- a/thirdparty/opus/celt/arm/celt_ne10_fft.c +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file celt_ne10_fft.c - @brief ARM Neon optimizations for fft using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include -#include -#include "os_support.h" -#include "kiss_fft.h" -#include "stack_alloc.h" - -#if !defined(FIXED_POINT) -# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon -# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t -# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t -# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32 -# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t -# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon -#else -# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft) -# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t -# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t -# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 -# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 -# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t -# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon -#endif - -#if defined(CUSTOM_MODES) - -/* nfft lengths in NE10 that support scaled fft */ -# define NE10_FFTSCALED_SUPPORT_MAX 4 -static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = { - 480, 240, 120, 60 -}; - -int opus_fft_alloc_arm_neon(kiss_fft_state *st) -{ - int i; - size_t memneeded = sizeof(struct arch_fft_state); - - st->arch_fft = (arch_fft_state *)opus_alloc(memneeded); - if (!st->arch_fft) - return -1; - - for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) { - if(st->nfft == ne10_fft_scaled_support[i]) - break; - } - if (i == NE10_FFTSCALED_SUPPORT_MAX) { - /* This nfft length (scaled fft) is not supported in NE10 */ - st->arch_fft->is_supported = 0; - st->arch_fft->priv = NULL; - } - else { - st->arch_fft->is_supported = 1; - st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft); - if (st->arch_fft->priv == NULL) { - return -1; - } - } - return 0; -} - -void opus_fft_free_arm_neon(kiss_fft_state *st) -{ - NE10_FFT_CFG_TYPE_T cfg; - - if (!st->arch_fft) - return; - - cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv; - if (cfg) - NE10_FFT_DESTROY_C2C_TYPE(cfg); - opus_free(st->arch_fft); -} -#endif - -void opus_fft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) -{ - NE10_FFT_STATE_TYPE_T state; - NE10_FFT_CFG_TYPE_T cfg = &state; - VARDECL(NE10_FFT_CPX_TYPE_T, buffer); - SAVE_STACK; - ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); - - if (!st->arch_fft->is_supported) { - /* This nfft length (scaled fft) not supported in NE10 */ - opus_fft_c(st, fin, fout); - } - else { - memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); - state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; -#if !defined(FIXED_POINT) - state.is_forward_scaled = 1; - - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 0); -#else - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 0, 1); -#endif - } - RESTORE_STACK; -} - -void opus_ifft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) -{ - NE10_FFT_STATE_TYPE_T state; - NE10_FFT_CFG_TYPE_T cfg = &state; - VARDECL(NE10_FFT_CPX_TYPE_T, buffer); - SAVE_STACK; - ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); - - if (!st->arch_fft->is_supported) { - /* This nfft length (scaled fft) not supported in NE10 */ - opus_ifft_c(st, fin, fout); - } - else { - memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); - state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; -#if !defined(FIXED_POINT) - state.is_backward_scaled = 0; - - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 1); -#else - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 1, 0); -#endif - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/celt/arm/celt_ne10_mdct.c b/thirdparty/opus/celt/arm/celt_ne10_mdct.c deleted file mode 100644 index 293c3efd7a..0000000000 --- a/thirdparty/opus/celt/arm/celt_ne10_mdct.c +++ /dev/null @@ -1,258 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file celt_ne10_mdct.c - @brief ARM Neon optimizations for mdct using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include "kiss_fft.h" -#include "_kiss_fft_guts.h" -#include "mdct.h" -#include "stack_alloc.h" - -void clt_mdct_forward_neon(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, - int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_cpx, f2); - const kiss_fft_state *st = l->kfft[shift]; - const kiss_twiddle_scalar *trig; - - SAVE_STACK; - - N = l->n; - trig = l->trig; - for (i=0;i>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N4, kiss_fft_cpx); - - /* Consider the input to be composed of four blocks: [a, b, c, d] */ - /* Window, shuffle, fold */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); - const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; - for(i=0;i<((overlap+3)>>2);i++) - { - /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ - *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); - *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - wp1 = window; - wp2 = window+overlap-1; - for(;i>2);i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = *xp2; - *yp++ = *xp1; - xp1+=2; - xp2-=2; - } - for(;ii,t[N4+i]) - S_MUL(fp->r,t[i]); - yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); - *yp1 = yr; - *yp2 = yi; - fp++; - yp1 += 2*stride; - yp2 -= 2*stride; - } - } - RESTORE_STACK; -} - -void clt_mdct_backward_neon(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, - int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - const kiss_twiddle_scalar *trig; - const kiss_fft_state *st = l->kfft[shift]; - - N = l->n; - trig = l->trig; - for (i=0;i>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - - /* Pre-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; - for(i=0;i>1)), arch); - - /* Post-rotate and de-shuffle from both ends of the buffer at once to make - it in-place. */ - { - kiss_fft_scalar * yp0 = out+(overlap>>1); - kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &trig[0]; - /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the - middle pair will be computed twice. */ - for(i=0;i<(N4+1)>>1;i++) - { - kiss_fft_scalar re, im, yr, yi; - kiss_twiddle_scalar t0, t1; - re = yp0[0]; - im = yp0[1]; - t0 = t[i]; - t1 = t[N4+i]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - re = yp1[0]; - im = yp1[1]; - yp0[0] = yr; - yp1[1] = yi; - - t0 = t[(N4-i-1)]; - t1 = t[(N2-i-1)]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - yp1[0] = yr; - yp0[1] = yi; - yp0 += 2; - yp1 -= 2; - } - } - - /* Mirror on both sides for TDAC */ - { - kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - const opus_val16 * OPUS_RESTRICT wp1 = window; - const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - - for(i = 0; i < overlap/2; i++) - { - kiss_fft_scalar x1, x2; - x1 = *xp1; - x2 = *yp1; - *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); - *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); - wp1++; - wp2--; - } - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/celt/arm/celt_neon_intr.c b/thirdparty/opus/celt/arm/celt_neon_intr.c deleted file mode 100644 index 47bbe3dc22..0000000000 --- a/thirdparty/opus/celt/arm/celt_neon_intr.c +++ /dev/null @@ -1,311 +0,0 @@ -/* Copyright (c) 2014-2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file celt_neon_intr.c - @brief ARM Neon Intrinsic optimizations for celt - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "../pitch.h" - -#if defined(FIXED_POINT) -void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) -{ - int j; - int32x4_t a = vld1q_s32(sum); - /* Load y[0...3] */ - /* This requires len>0 to always be valid (which we assert in the C code). */ - int16x4_t y0 = vld1_s16(y); - y += 4; - - for (j = 0; j + 8 <= len; j += 8) - { - /* Load x[0...7] */ - int16x8_t xx = vld1q_s16(x); - int16x4_t x0 = vget_low_s16(xx); - int16x4_t x4 = vget_high_s16(xx); - /* Load y[4...11] */ - int16x8_t yy = vld1q_s16(y); - int16x4_t y4 = vget_low_s16(yy); - int16x4_t y8 = vget_high_s16(yy); - int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0); - int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0); - - int16x4_t y1 = vext_s16(y0, y4, 1); - int16x4_t y5 = vext_s16(y4, y8, 1); - int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1); - int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1); - - int16x4_t y2 = vext_s16(y0, y4, 2); - int16x4_t y6 = vext_s16(y4, y8, 2); - int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2); - int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2); - - int16x4_t y3 = vext_s16(y0, y4, 3); - int16x4_t y7 = vext_s16(y4, y8, 3); - int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3); - int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3); - - y0 = y8; - a = a7; - x += 8; - y += 8; - } - - for (; j < len; j++) - { - int16x4_t x0 = vld1_dup_s16(x); /* load next x */ - int32x4_t a0 = vmlal_s16(a, y0, x0); - - int16x4_t y4 = vld1_dup_s16(y); /* load next y */ - y0 = vext_s16(y0, y4, 1); - a = a0; - x++; - y++; - } - - vst1q_s32(sum, a); -} - -#else -/* - * Function: xcorr_kernel_neon_float - * --------------------------------- - * Computes 4 correlation values and stores them in sum[4] - */ -static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y, - float32_t sum[4], int len) { - float32x4_t YY[3]; - float32x4_t YEXT[3]; - float32x4_t XX[2]; - float32x2_t XX_2; - float32x4_t SUMM; - const float32_t *xi = x; - const float32_t *yi = y; - - celt_assert(len>0); - - YY[0] = vld1q_f32(yi); - SUMM = vdupq_n_f32(0); - - /* Consume 8 elements in x vector and 12 elements in y - * vector. However, the 12'th element never really gets - * touched in this loop. So, if len == 8, then we only - * must access y[0] to y[10]. y[11] must not be accessed - * hence make sure len > 8 and not len >= 8 - */ - while (len > 8) { - yi += 4; - YY[1] = vld1q_f32(yi); - yi += 4; - YY[2] = vld1q_f32(yi); - - XX[0] = vld1q_f32(xi); - xi += 4; - XX[1] = vld1q_f32(xi); - xi += 4; - - SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); - YEXT[0] = vextq_f32(YY[0], YY[1], 1); - SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); - YEXT[1] = vextq_f32(YY[0], YY[1], 2); - SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); - YEXT[2] = vextq_f32(YY[0], YY[1], 3); - SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); - - SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0); - YEXT[0] = vextq_f32(YY[1], YY[2], 1); - SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1); - YEXT[1] = vextq_f32(YY[1], YY[2], 2); - SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0); - YEXT[2] = vextq_f32(YY[1], YY[2], 3); - SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1); - - YY[0] = YY[2]; - len -= 8; - } - - /* Consume 4 elements in x vector and 8 elements in y - * vector. However, the 8'th element in y never really gets - * touched in this loop. So, if len == 4, then we only - * must access y[0] to y[6]. y[7] must not be accessed - * hence make sure len>4 and not len>=4 - */ - if (len > 4) { - yi += 4; - YY[1] = vld1q_f32(yi); - - XX[0] = vld1q_f32(xi); - xi += 4; - - SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); - YEXT[0] = vextq_f32(YY[0], YY[1], 1); - SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); - YEXT[1] = vextq_f32(YY[0], YY[1], 2); - SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); - YEXT[2] = vextq_f32(YY[0], YY[1], 3); - SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); - - YY[0] = YY[1]; - len -= 4; - } - - while (--len > 0) { - XX_2 = vld1_dup_f32(xi++); - SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); - YY[0]= vld1q_f32(++yi); - } - - XX_2 = vld1_dup_f32(xi); - SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); - - vst1q_f32(sum, SUMM); -} - -/* - * Function: xcorr_kernel_neon_float_process1 - * --------------------------------- - * Computes single correlation values and stores in *sum - */ -static void xcorr_kernel_neon_float_process1(const float32_t *x, - const float32_t *y, float32_t *sum, int len) { - float32x4_t XX[4]; - float32x4_t YY[4]; - float32x2_t XX_2; - float32x2_t YY_2; - float32x4_t SUMM; - float32x2_t SUMM_2[2]; - const float32_t *xi = x; - const float32_t *yi = y; - - SUMM = vdupq_n_f32(0); - - /* Work on 16 values per iteration */ - while (len >= 16) { - XX[0] = vld1q_f32(xi); - xi += 4; - XX[1] = vld1q_f32(xi); - xi += 4; - XX[2] = vld1q_f32(xi); - xi += 4; - XX[3] = vld1q_f32(xi); - xi += 4; - - YY[0] = vld1q_f32(yi); - yi += 4; - YY[1] = vld1q_f32(yi); - yi += 4; - YY[2] = vld1q_f32(yi); - yi += 4; - YY[3] = vld1q_f32(yi); - yi += 4; - - SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); - SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); - SUMM = vmlaq_f32(SUMM, YY[2], XX[2]); - SUMM = vmlaq_f32(SUMM, YY[3], XX[3]); - len -= 16; - } - - /* Work on 8 values */ - if (len >= 8) { - XX[0] = vld1q_f32(xi); - xi += 4; - XX[1] = vld1q_f32(xi); - xi += 4; - - YY[0] = vld1q_f32(yi); - yi += 4; - YY[1] = vld1q_f32(yi); - yi += 4; - - SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); - SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); - len -= 8; - } - - /* Work on 4 values */ - if (len >= 4) { - XX[0] = vld1q_f32(xi); - xi += 4; - YY[0] = vld1q_f32(yi); - yi += 4; - SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); - len -= 4; - } - - /* Start accumulating results */ - SUMM_2[0] = vget_low_f32(SUMM); - if (len >= 2) { - /* While at it, consume 2 more values if available */ - XX_2 = vld1_f32(xi); - xi += 2; - YY_2 = vld1_f32(yi); - yi += 2; - SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2); - len -= 2; - } - SUMM_2[1] = vget_high_f32(SUMM); - SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]); - SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]); - /* Ok, now we have result accumulated in SUMM_2[0].0 */ - - if (len > 0) { - /* Case when you have one value left */ - XX_2 = vld1_dup_f32(xi); - YY_2 = vld1_dup_f32(yi); - SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2); - } - - vst1_lane_f32(sum, SUMM_2[0], 0); -} - -void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch) { - int i; - celt_assert(max_pitch > 0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); - - for (i = 0; i < (max_pitch-3); i += 4) { - xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i, - (float32_t *)xcorr+i, len); - } - - /* In case max_pitch isn't multiple of 4 - * compute single correlation value per iteration - */ - for (; i < max_pitch; i++) { - xcorr_kernel_neon_float_process1((const float32_t *)_x, - (const float32_t *)_y+i, (float32_t *)xcorr+i, len); - } -} -#endif diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S deleted file mode 100644 index 5b2ee55a10..0000000000 --- a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S +++ /dev/null @@ -1,551 +0,0 @@ - .syntax unified -@ Copyright (c) 2007-2008 CSIRO -@ Copyright (c) 2007-2009 Xiph.Org Foundation -@ Copyright (c) 2013 Parrot -@ Written by Aurélien Zanelli -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions -@ are met: -@ -@ - Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ -@ - Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - .text; .p2align 2; .arch armv7-a - .fpu neon - .object_arch armv4t - - .include "celt/arm/armopts-gnu.S" - - .if OPUS_ARM_MAY_HAVE_EDSP - .global celt_pitch_xcorr_edsp - .endif - - .if OPUS_ARM_MAY_HAVE_NEON - .global celt_pitch_xcorr_neon - .endif - - .if OPUS_ARM_MAY_HAVE_NEON - -@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 -; xcorr_kernel_neon: @ PROC -xcorr_kernel_neon_start: - @ input: - @ r3 = int len - @ r4 = opus_val16 *x - @ r5 = opus_val16 *y - @ q0 = opus_val32 sum[4] - @ output: - @ q0 = opus_val32 sum[4] - @ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 - @ internal usage: - @ r12 = int j - @ d3 = y_3|y_2|y_1|y_0 - @ q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 - @ q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 - @ q8 = scratch - @ - @ Load y[0...3] - @ This requires len>0 to always be valid (which we assert in the C code). - VLD1.16 {d5}, [r5]! - SUBS r12, r3, #8 - BLE xcorr_kernel_neon_process4 -@ Process 8 samples at a time. -@ This loop loads one y value more than we actually need. Therefore we have to -@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid -@ reading past the end of the array. -xcorr_kernel_neon_process8: - @ This loop has 19 total instructions (10 cycles to issue, minimum), with - @ - 2 cycles of ARM insrtuctions, - @ - 10 cycles of load/store/byte permute instructions, and - @ - 9 cycles of data processing instructions. - @ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the - @ latter two categories, meaning the whole loop should run in 10 cycles per - @ iteration, barring cache misses. - @ - @ Load x[0...7] - VLD1.16 {d6, d7}, [r4]! - @ Unlike VMOV, VAND is a data processsing instruction (and doesn't get - @ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. - VAND d3, d5, d5 - SUBS r12, r12, #8 - @ Load y[4...11] - VLD1.16 {d4, d5}, [r5]! - VMLAL.S16 q0, d3, d6[0] - VEXT.16 d16, d3, d4, #1 - VMLAL.S16 q0, d4, d7[0] - VEXT.16 d17, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d3, d4, #2 - VMLAL.S16 q0, d17, d7[1] - VEXT.16 d17, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d3, d4, #3 - VMLAL.S16 q0, d17, d7[2] - VEXT.16 d17, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] - VMLAL.S16 q0, d17, d7[3] - BGT xcorr_kernel_neon_process8 -@ Process 4 samples here if we have > 4 left (still reading one extra y value). -xcorr_kernel_neon_process4: - ADDS r12, r12, #4 - BLE xcorr_kernel_neon_process2 - @ Load x[0...3] - VLD1.16 d6, [r4]! - @ Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #4 - @ Load y[4...7] - VLD1.16 d5, [r5]! - VMLAL.S16 q0, d4, d6[0] - VEXT.16 d16, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] -@ Process 2 samples here if we have > 2 left (still reading one extra y value). -xcorr_kernel_neon_process2: - ADDS r12, r12, #2 - BLE xcorr_kernel_neon_process1 - @ Load x[0...1] - VLD2.16 {d6[],d7[]}, [r4]! - @ Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #2 - @ Load y[4...5] - VLD1.32 {d5[]}, [r5]! - VMLAL.S16 q0, d4, d6 - VEXT.16 d16, d4, d5, #1 - @ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI - @ instead of VEXT, since it's a data-processing instruction. - VSRI.64 d5, d4, #32 - VMLAL.S16 q0, d16, d7 -@ Process 1 sample using the extra y value we loaded above. -xcorr_kernel_neon_process1: - @ Load next *x - VLD1.16 {d6[]}, [r4]! - ADDS r12, r12, #1 - @ y[0...3] are left in d5 from prior iteration(s) (if any) - VMLAL.S16 q0, d5, d6 - MOVLE pc, lr -@ Now process 1 last sample, not reading ahead. - @ Load last *y - VLD1.16 {d4[]}, [r5]! - VSRI.64 d4, d5, #16 - @ Load last *x - VLD1.16 {d6[]}, [r4]! - VMLAL.S16 q0, d4, d6 - MOV pc, lr - .size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP - -@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, -@ opus_val32 *xcorr, int len, int max_pitch) -; celt_pitch_xcorr_neon: @ PROC - @ input: - @ r0 = opus_val16 *_x - @ r1 = opus_val16 *_y - @ r2 = opus_val32 *xcorr - @ r3 = int len - @ output: - @ r0 = int maxcorr - @ internal usage: - @ r4 = opus_val16 *x (for xcorr_kernel_neon()) - @ r5 = opus_val16 *y (for xcorr_kernel_neon()) - @ r6 = int max_pitch - @ r12 = int j - @ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) - STMFD sp!, {r4-r6, lr} - LDR r6, [sp, #16] - VMOV.S32 q15, #1 - @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - SUBS r6, r6, #4 - BLT celt_pitch_xcorr_neon_process4_done -celt_pitch_xcorr_neon_process4: - @ xcorr_kernel_neon parameters: - @ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} - MOV r4, r0 - MOV r5, r1 - VEOR q0, q0, q0 - @ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. - @ So we don't save/restore any other registers. - BL xcorr_kernel_neon_start - SUBS r6, r6, #4 - VST1.32 {q0}, [r2]! - @ _y += 4 - ADD r1, r1, #8 - VMAX.S32 q15, q15, q0 - @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - BGE celt_pitch_xcorr_neon_process4 -@ We have less than 4 sums left to compute. -celt_pitch_xcorr_neon_process4_done: - ADDS r6, r6, #4 - @ Reduce maxcorr to a single value - VMAX.S32 d30, d30, d31 - VPMAX.S32 d30, d30, d30 - @ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done - BLE celt_pitch_xcorr_neon_done -@ Now compute each remaining sum one at a time. -celt_pitch_xcorr_neon_process_remaining: - MOV r4, r0 - MOV r5, r1 - VMOV.I32 q0, #0 - SUBS r12, r3, #8 - BLT celt_pitch_xcorr_neon_process_remaining4 -@ Sum terms 8 at a time. -celt_pitch_xcorr_neon_process_remaining_loop8: - @ Load x[0...7] - VLD1.16 {q1}, [r4]! - @ Load y[0...7] - VLD1.16 {q2}, [r5]! - SUBS r12, r12, #8 - VMLAL.S16 q0, d4, d2 - VMLAL.S16 q0, d5, d3 - BGE celt_pitch_xcorr_neon_process_remaining_loop8 -@ Sum terms 4 at a time. -celt_pitch_xcorr_neon_process_remaining4: - ADDS r12, r12, #4 - BLT celt_pitch_xcorr_neon_process_remaining4_done - @ Load x[0...3] - VLD1.16 {d2}, [r4]! - @ Load y[0...3] - VLD1.16 {d3}, [r5]! - SUB r12, r12, #4 - VMLAL.S16 q0, d3, d2 -celt_pitch_xcorr_neon_process_remaining4_done: - @ Reduce the sum to a single value. - VADD.S32 d0, d0, d1 - VPADDL.S32 d0, d0 - ADDS r12, r12, #4 - BLE celt_pitch_xcorr_neon_process_remaining_loop_done -@ Sum terms 1 at a time. -celt_pitch_xcorr_neon_process_remaining_loop1: - VLD1.16 {d2[]}, [r4]! - VLD1.16 {d3[]}, [r5]! - SUBS r12, r12, #1 - VMLAL.S16 q0, d2, d3 - BGT celt_pitch_xcorr_neon_process_remaining_loop1 -celt_pitch_xcorr_neon_process_remaining_loop_done: - VST1.32 {d0[0]}, [r2]! - VMAX.S32 d30, d30, d0 - SUBS r6, r6, #1 - @ _y++ - ADD r1, r1, #2 - @ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining - BGT celt_pitch_xcorr_neon_process_remaining -celt_pitch_xcorr_neon_done: - VMOV.32 r0, d30[0] - LDMFD sp!, {r4-r6, pc} - .size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon @ ENDP - - .endif - - .if OPUS_ARM_MAY_HAVE_EDSP - -@ This will get used on ARMv7 devices without NEON, so it has been optimized -@ to take advantage of dual-issuing where possible. -; xcorr_kernel_edsp: @ PROC -xcorr_kernel_edsp_start: - @ input: - @ r3 = int len - @ r4 = opus_val16 *_x (must be 32-bit aligned) - @ r5 = opus_val16 *_y (must be 32-bit aligned) - @ r6...r9 = opus_val32 sum[4] - @ output: - @ r6...r9 = opus_val32 sum[4] - @ preserved: r0-r5 - @ internal usage - @ r2 = int j - @ r12,r14 = opus_val16 x[4] - @ r10,r11 = opus_val16 y[4] - STMFD sp!, {r2,r4,r5,lr} - LDR r10, [r5], #4 @ Load y[0...1] - SUBS r2, r3, #4 @ j = len-4 - LDR r11, [r5], #4 @ Load y[2...3] - BLE xcorr_kernel_edsp_process4_done - LDR r12, [r4], #4 @ Load x[0...1] - @ Stall -xcorr_kernel_edsp_process4: - @ The multiplies must issue from pipeline 0, and can't dual-issue with each - @ other. Every other instruction here dual-issues with a multiply, and is - @ thus "free". There should be no stalls in the body of the loop. - SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_0,y_0) - LDR r14, [r4], #4 @ Load x[2...3] - SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x_0,y_1) - SUBS r2, r2, #4 @ j-=4 - SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_0,y_2) - SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x_0,y_3) - SMLATT r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_1,y_1) - LDR r10, [r5], #4 @ Load y[4...5] - SMLATB r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],x_1,y_2) - SMLATT r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_1,y_3) - SMLATB r9, r12, r10, r9 @ sum[3] = MAC16_16(sum[3],x_1,y_4) - LDRGT r12, [r4], #4 @ Load x[0...1] - SMLABB r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_2,y_2) - SMLABT r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x_2,y_3) - SMLABB r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_2,y_4) - SMLABT r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x_2,y_5) - SMLATT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_3,y_3) - LDR r11, [r5], #4 @ Load y[6...7] - SMLATB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],x_3,y_4) - SMLATT r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_3,y_5) - SMLATB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],x_3,y_6) - BGT xcorr_kernel_edsp_process4 -xcorr_kernel_edsp_process4_done: - ADDS r2, r2, #4 - BLE xcorr_kernel_edsp_done - LDRH r12, [r4], #2 @ r12 = *x++ - SUBS r2, r2, #1 @ j-- - @ Stall - SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_0) - LDRHGT r14, [r4], #2 @ r14 = *x++ - SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x,y_1) - SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_2) - SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x,y_3) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_1) - SUBS r2, r2, #1 @ j-- - SMLABB r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x,y_2) - LDRH r10, [r5], #2 @ r10 = y_4 = *y++ - SMLABT r8, r14, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_3) - LDRHGT r12, [r4], #2 @ r12 = *x++ - SMLABB r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x,y_4) - BLE xcorr_kernel_edsp_done - SMLABB r6, r12, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_2) - CMP r2, #1 @ j-- - SMLABT r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_3) - LDRH r2, [r5], #2 @ r2 = y_5 = *y++ - SMLABB r8, r12, r10, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_4) - LDRHGT r14, [r4] @ r14 = *x - SMLABB r9, r12, r2, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_5) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_3) - LDRH r11, [r5] @ r11 = y_6 = *y - SMLABB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_4) - SMLABB r8, r14, r2, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_5) - SMLABB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_6) -xcorr_kernel_edsp_done: - LDMFD sp!, {r2,r4,r5,pc} - .size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP - -; celt_pitch_xcorr_edsp: @ PROC - @ input: - @ r0 = opus_val16 *_x (must be 32-bit aligned) - @ r1 = opus_val16 *_y (only needs to be 16-bit aligned) - @ r2 = opus_val32 *xcorr - @ r3 = int len - @ output: - @ r0 = maxcorr - @ internal usage - @ r4 = opus_val16 *x - @ r5 = opus_val16 *y - @ r6 = opus_val32 sum0 - @ r7 = opus_val32 sum1 - @ r8 = opus_val32 sum2 - @ r9 = opus_val32 sum3 - @ r1 = int max_pitch - @ r12 = int j - STMFD sp!, {r4-r11, lr} - MOV r5, r1 - LDR r1, [sp, #36] - MOV r4, r0 - TST r5, #3 - @ maxcorr = 1 - MOV r0, #1 - BEQ celt_pitch_xcorr_edsp_process1u_done -@ Compute one sum at the start to make y 32-bit aligned. - SUBS r12, r3, #4 - @ r14 = sum = 0 - MOV r14, #0 - LDRH r8, [r5], #2 - BLE celt_pitch_xcorr_edsp_process1u_loop4_done - LDR r6, [r4], #4 - MOV r8, r8, LSL #16 -celt_pitch_xcorr_edsp_process1u_loop4: - LDR r9, [r5], #4 - SMLABT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) - LDR r7, [r4], #4 - SMLATB r14, r6, r9, r14 @ sum = MAC16_16(sum, x_1, y_1) - LDR r8, [r5], #4 - SMLABT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) - SUBS r12, r12, #4 @ j-=4 - SMLATB r14, r7, r8, r14 @ sum = MAC16_16(sum, x_3, y_3) - LDRGT r6, [r4], #4 - BGT celt_pitch_xcorr_edsp_process1u_loop4 - MOV r8, r8, LSR #16 -celt_pitch_xcorr_edsp_process1u_loop4_done: - ADDS r12, r12, #4 -celt_pitch_xcorr_edsp_process1u_loop1: - LDRHGE r6, [r4], #2 - @ Stall - SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) - SUBSGE r12, r12, #1 - LDRHGT r8, [r5], #2 - BGT celt_pitch_xcorr_edsp_process1u_loop1 - @ Restore _x - SUB r4, r4, r3, LSL #1 - @ Restore and advance _y - SUB r5, r5, r3, LSL #1 - @ maxcorr = max(maxcorr, sum) - CMP r0, r14 - ADD r5, r5, #2 - MOVLT r0, r14 - SUBS r1, r1, #1 - @ xcorr[i] = sum - STR r14, [r2], #4 - BLE celt_pitch_xcorr_edsp_done -celt_pitch_xcorr_edsp_process1u_done: - @ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 - SUBS r1, r1, #4 - BLT celt_pitch_xcorr_edsp_process2 -celt_pitch_xcorr_edsp_process4: - @ xcorr_kernel_edsp parameters: - @ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} - MOV r6, #0 - MOV r7, #0 - MOV r8, #0 - MOV r9, #0 - BL xcorr_kernel_edsp_start @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) - @ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) - CMP r0, r6 - @ _y+=4 - ADD r5, r5, #8 - MOVLT r0, r6 - CMP r0, r7 - MOVLT r0, r7 - CMP r0, r8 - MOVLT r0, r8 - CMP r0, r9 - MOVLT r0, r9 - STMIA r2!, {r6-r9} - SUBS r1, r1, #4 - BGE celt_pitch_xcorr_edsp_process4 -celt_pitch_xcorr_edsp_process2: - ADDS r1, r1, #2 - BLT celt_pitch_xcorr_edsp_process1a - SUBS r12, r3, #4 - @ {r10, r11} = {sum0, sum1} = {0, 0} - MOV r10, #0 - MOV r11, #0 - LDR r8, [r5], #4 - BLE celt_pitch_xcorr_edsp_process2_loop_done - LDR r6, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process2_loop4: - SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) - LDR r7, [r4], #4 - SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) - SUBS r12, r12, #4 @ j-=4 - SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) - LDR r8, [r5], #4 - SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) - LDRGT r6, [r4], #4 - SMLABB r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_2, y_2) - SMLABT r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_2, y_3) - SMLATT r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_3, y_3) - LDRGT r9, [r5], #4 - SMLATB r11, r7, r8, r11 @ sum1 = MAC16_16(sum1, x_3, y_4) - BGT celt_pitch_xcorr_edsp_process2_loop4 -celt_pitch_xcorr_edsp_process2_loop_done: - ADDS r12, r12, #2 - BLE celt_pitch_xcorr_edsp_process2_1 - LDR r6, [r4], #4 - @ Stall - SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) - LDR r9, [r5], #4 - SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) - SUB r12, r12, #2 - SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) - MOV r8, r9 - SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) -celt_pitch_xcorr_edsp_process2_1: - LDRH r6, [r4], #2 - ADDS r12, r12, #1 - @ Stall - SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) - LDRHGT r7, [r4], #2 - SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) - BLE celt_pitch_xcorr_edsp_process2_done - LDRH r9, [r5], #2 - SMLABT r10, r7, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_1) - SMLABB r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_0, y_2) -celt_pitch_xcorr_edsp_process2_done: - @ Restore _x - SUB r4, r4, r3, LSL #1 - @ Restore and advance _y - SUB r5, r5, r3, LSL #1 - @ maxcorr = max(maxcorr, sum0) - CMP r0, r10 - ADD r5, r5, #2 - MOVLT r0, r10 - SUB r1, r1, #2 - @ maxcorr = max(maxcorr, sum1) - CMP r0, r11 - @ xcorr[i] = sum - STR r10, [r2], #4 - MOVLT r0, r11 - STR r11, [r2], #4 -celt_pitch_xcorr_edsp_process1a: - ADDS r1, r1, #1 - BLT celt_pitch_xcorr_edsp_done - SUBS r12, r3, #4 - @ r14 = sum = 0 - MOV r14, #0 - BLT celt_pitch_xcorr_edsp_process1a_loop_done - LDR r6, [r4], #4 - LDR r8, [r5], #4 - LDR r7, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process1a_loop4: - SMLABB r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) - SUBS r12, r12, #4 @ j-=4 - SMLATT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) - LDRGE r6, [r4], #4 - SMLABB r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) - LDRGE r8, [r5], #4 - SMLATT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_3, y_3) - LDRGE r7, [r4], #4 - LDRGE r9, [r5], #4 - BGE celt_pitch_xcorr_edsp_process1a_loop4 -celt_pitch_xcorr_edsp_process1a_loop_done: - ADDS r12, r12, #2 - LDRGE r6, [r4], #4 - LDRGE r8, [r5], #4 - @ Stall - SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) - SUBGE r12, r12, #2 - SMLATTGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) - ADDS r12, r12, #1 - LDRHGE r6, [r4], #2 - LDRHGE r8, [r5], #2 - @ Stall - SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) - @ maxcorr = max(maxcorr, sum) - CMP r0, r14 - @ xcorr[i] = sum - STR r14, [r2], #4 - MOVLT r0, r14 -celt_pitch_xcorr_edsp_done: - LDMFD sp!, {r4-r11, pc} - .size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp @ ENDP - - .endif - -@ END: - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s deleted file mode 100644 index f96e0a88bb..0000000000 --- a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s +++ /dev/null @@ -1,547 +0,0 @@ -; Copyright (c) 2007-2008 CSIRO -; Copyright (c) 2007-2009 Xiph.Org Foundation -; Copyright (c) 2013 Parrot -; Written by Aurélien Zanelli -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions -; are met: -; -; - Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; - Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - AREA |.text|, CODE, READONLY - - GET celt/arm/armopts.s - -IF OPUS_ARM_MAY_HAVE_EDSP - EXPORT celt_pitch_xcorr_edsp -ENDIF - -IF OPUS_ARM_MAY_HAVE_NEON - EXPORT celt_pitch_xcorr_neon -ENDIF - -IF OPUS_ARM_MAY_HAVE_NEON - -; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 -xcorr_kernel_neon PROC -xcorr_kernel_neon_start - ; input: - ; r3 = int len - ; r4 = opus_val16 *x - ; r5 = opus_val16 *y - ; q0 = opus_val32 sum[4] - ; output: - ; q0 = opus_val32 sum[4] - ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 - ; internal usage: - ; r12 = int j - ; d3 = y_3|y_2|y_1|y_0 - ; q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 - ; q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 - ; q8 = scratch - ; - ; Load y[0...3] - ; This requires len>0 to always be valid (which we assert in the C code). - VLD1.16 {d5}, [r5]! - SUBS r12, r3, #8 - BLE xcorr_kernel_neon_process4 -; Process 8 samples at a time. -; This loop loads one y value more than we actually need. Therefore we have to -; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid -; reading past the end of the array. -xcorr_kernel_neon_process8 - ; This loop has 19 total instructions (10 cycles to issue, minimum), with - ; - 2 cycles of ARM insrtuctions, - ; - 10 cycles of load/store/byte permute instructions, and - ; - 9 cycles of data processing instructions. - ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the - ; latter two categories, meaning the whole loop should run in 10 cycles per - ; iteration, barring cache misses. - ; - ; Load x[0...7] - VLD1.16 {d6, d7}, [r4]! - ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get - ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. - VAND d3, d5, d5 - SUBS r12, r12, #8 - ; Load y[4...11] - VLD1.16 {d4, d5}, [r5]! - VMLAL.S16 q0, d3, d6[0] - VEXT.16 d16, d3, d4, #1 - VMLAL.S16 q0, d4, d7[0] - VEXT.16 d17, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d3, d4, #2 - VMLAL.S16 q0, d17, d7[1] - VEXT.16 d17, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d3, d4, #3 - VMLAL.S16 q0, d17, d7[2] - VEXT.16 d17, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] - VMLAL.S16 q0, d17, d7[3] - BGT xcorr_kernel_neon_process8 -; Process 4 samples here if we have > 4 left (still reading one extra y value). -xcorr_kernel_neon_process4 - ADDS r12, r12, #4 - BLE xcorr_kernel_neon_process2 - ; Load x[0...3] - VLD1.16 d6, [r4]! - ; Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #4 - ; Load y[4...7] - VLD1.16 d5, [r5]! - VMLAL.S16 q0, d4, d6[0] - VEXT.16 d16, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] -; Process 2 samples here if we have > 2 left (still reading one extra y value). -xcorr_kernel_neon_process2 - ADDS r12, r12, #2 - BLE xcorr_kernel_neon_process1 - ; Load x[0...1] - VLD2.16 {d6[],d7[]}, [r4]! - ; Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #2 - ; Load y[4...5] - VLD1.32 {d5[]}, [r5]! - VMLAL.S16 q0, d4, d6 - VEXT.16 d16, d4, d5, #1 - ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI - ; instead of VEXT, since it's a data-processing instruction. - VSRI.64 d5, d4, #32 - VMLAL.S16 q0, d16, d7 -; Process 1 sample using the extra y value we loaded above. -xcorr_kernel_neon_process1 - ; Load next *x - VLD1.16 {d6[]}, [r4]! - ADDS r12, r12, #1 - ; y[0...3] are left in d5 from prior iteration(s) (if any) - VMLAL.S16 q0, d5, d6 - MOVLE pc, lr -; Now process 1 last sample, not reading ahead. - ; Load last *y - VLD1.16 {d4[]}, [r5]! - VSRI.64 d4, d5, #16 - ; Load last *x - VLD1.16 {d6[]}, [r4]! - VMLAL.S16 q0, d4, d6 - MOV pc, lr - ENDP - -; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, -; opus_val32 *xcorr, int len, int max_pitch) -celt_pitch_xcorr_neon PROC - ; input: - ; r0 = opus_val16 *_x - ; r1 = opus_val16 *_y - ; r2 = opus_val32 *xcorr - ; r3 = int len - ; output: - ; r0 = int maxcorr - ; internal usage: - ; r4 = opus_val16 *x (for xcorr_kernel_neon()) - ; r5 = opus_val16 *y (for xcorr_kernel_neon()) - ; r6 = int max_pitch - ; r12 = int j - ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) - STMFD sp!, {r4-r6, lr} - LDR r6, [sp, #16] - VMOV.S32 q15, #1 - ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - SUBS r6, r6, #4 - BLT celt_pitch_xcorr_neon_process4_done -celt_pitch_xcorr_neon_process4 - ; xcorr_kernel_neon parameters: - ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} - MOV r4, r0 - MOV r5, r1 - VEOR q0, q0, q0 - ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. - ; So we don't save/restore any other registers. - BL xcorr_kernel_neon_start - SUBS r6, r6, #4 - VST1.32 {q0}, [r2]! - ; _y += 4 - ADD r1, r1, #8 - VMAX.S32 q15, q15, q0 - ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - BGE celt_pitch_xcorr_neon_process4 -; We have less than 4 sums left to compute. -celt_pitch_xcorr_neon_process4_done - ADDS r6, r6, #4 - ; Reduce maxcorr to a single value - VMAX.S32 d30, d30, d31 - VPMAX.S32 d30, d30, d30 - ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done - BLE celt_pitch_xcorr_neon_done -; Now compute each remaining sum one at a time. -celt_pitch_xcorr_neon_process_remaining - MOV r4, r0 - MOV r5, r1 - VMOV.I32 q0, #0 - SUBS r12, r3, #8 - BLT celt_pitch_xcorr_neon_process_remaining4 -; Sum terms 8 at a time. -celt_pitch_xcorr_neon_process_remaining_loop8 - ; Load x[0...7] - VLD1.16 {q1}, [r4]! - ; Load y[0...7] - VLD1.16 {q2}, [r5]! - SUBS r12, r12, #8 - VMLAL.S16 q0, d4, d2 - VMLAL.S16 q0, d5, d3 - BGE celt_pitch_xcorr_neon_process_remaining_loop8 -; Sum terms 4 at a time. -celt_pitch_xcorr_neon_process_remaining4 - ADDS r12, r12, #4 - BLT celt_pitch_xcorr_neon_process_remaining4_done - ; Load x[0...3] - VLD1.16 {d2}, [r4]! - ; Load y[0...3] - VLD1.16 {d3}, [r5]! - SUB r12, r12, #4 - VMLAL.S16 q0, d3, d2 -celt_pitch_xcorr_neon_process_remaining4_done - ; Reduce the sum to a single value. - VADD.S32 d0, d0, d1 - VPADDL.S32 d0, d0 - ADDS r12, r12, #4 - BLE celt_pitch_xcorr_neon_process_remaining_loop_done -; Sum terms 1 at a time. -celt_pitch_xcorr_neon_process_remaining_loop1 - VLD1.16 {d2[]}, [r4]! - VLD1.16 {d3[]}, [r5]! - SUBS r12, r12, #1 - VMLAL.S16 q0, d2, d3 - BGT celt_pitch_xcorr_neon_process_remaining_loop1 -celt_pitch_xcorr_neon_process_remaining_loop_done - VST1.32 {d0[0]}, [r2]! - VMAX.S32 d30, d30, d0 - SUBS r6, r6, #1 - ; _y++ - ADD r1, r1, #2 - ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining - BGT celt_pitch_xcorr_neon_process_remaining -celt_pitch_xcorr_neon_done - VMOV.32 r0, d30[0] - LDMFD sp!, {r4-r6, pc} - ENDP - -ENDIF - -IF OPUS_ARM_MAY_HAVE_EDSP - -; This will get used on ARMv7 devices without NEON, so it has been optimized -; to take advantage of dual-issuing where possible. -xcorr_kernel_edsp PROC -xcorr_kernel_edsp_start - ; input: - ; r3 = int len - ; r4 = opus_val16 *_x (must be 32-bit aligned) - ; r5 = opus_val16 *_y (must be 32-bit aligned) - ; r6...r9 = opus_val32 sum[4] - ; output: - ; r6...r9 = opus_val32 sum[4] - ; preserved: r0-r5 - ; internal usage - ; r2 = int j - ; r12,r14 = opus_val16 x[4] - ; r10,r11 = opus_val16 y[4] - STMFD sp!, {r2,r4,r5,lr} - LDR r10, [r5], #4 ; Load y[0...1] - SUBS r2, r3, #4 ; j = len-4 - LDR r11, [r5], #4 ; Load y[2...3] - BLE xcorr_kernel_edsp_process4_done - LDR r12, [r4], #4 ; Load x[0...1] - ; Stall -xcorr_kernel_edsp_process4 - ; The multiplies must issue from pipeline 0, and can't dual-issue with each - ; other. Every other instruction here dual-issues with a multiply, and is - ; thus "free". There should be no stalls in the body of the loop. - SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_0,y_0) - LDR r14, [r4], #4 ; Load x[2...3] - SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x_0,y_1) - SUBS r2, r2, #4 ; j-=4 - SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_0,y_2) - SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x_0,y_3) - SMLATT r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_1,y_1) - LDR r10, [r5], #4 ; Load y[4...5] - SMLATB r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],x_1,y_2) - SMLATT r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_1,y_3) - SMLATB r9, r12, r10, r9 ; sum[3] = MAC16_16(sum[3],x_1,y_4) - LDRGT r12, [r4], #4 ; Load x[0...1] - SMLABB r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_2,y_2) - SMLABT r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x_2,y_3) - SMLABB r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_2,y_4) - SMLABT r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x_2,y_5) - SMLATT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_3,y_3) - LDR r11, [r5], #4 ; Load y[6...7] - SMLATB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],x_3,y_4) - SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5) - SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6) - BGT xcorr_kernel_edsp_process4 -xcorr_kernel_edsp_process4_done - ADDS r2, r2, #4 - BLE xcorr_kernel_edsp_done - LDRH r12, [r4], #2 ; r12 = *x++ - SUBS r2, r2, #1 ; j-- - ; Stall - SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) - LDRHGT r14, [r4], #2 ; r14 = *x++ - SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) - SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) - SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1) - SUBS r2, r2, #1 ; j-- - SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) - LDRH r10, [r5], #2 ; r10 = y_4 = *y++ - SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) - LDRHGT r12, [r4], #2 ; r12 = *x++ - SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) - BLE xcorr_kernel_edsp_done - SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) - CMP r2, #1 ; j-- - SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) - LDRH r2, [r5], #2 ; r2 = y_5 = *y++ - SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) - LDRHGT r14, [r4] ; r14 = *x - SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) - LDRH r11, [r5] ; r11 = y_6 = *y - SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4) - SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5) - SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6) -xcorr_kernel_edsp_done - LDMFD sp!, {r2,r4,r5,pc} - ENDP - -celt_pitch_xcorr_edsp PROC - ; input: - ; r0 = opus_val16 *_x (must be 32-bit aligned) - ; r1 = opus_val16 *_y (only needs to be 16-bit aligned) - ; r2 = opus_val32 *xcorr - ; r3 = int len - ; output: - ; r0 = maxcorr - ; internal usage - ; r4 = opus_val16 *x - ; r5 = opus_val16 *y - ; r6 = opus_val32 sum0 - ; r7 = opus_val32 sum1 - ; r8 = opus_val32 sum2 - ; r9 = opus_val32 sum3 - ; r1 = int max_pitch - ; r12 = int j - STMFD sp!, {r4-r11, lr} - MOV r5, r1 - LDR r1, [sp, #36] - MOV r4, r0 - TST r5, #3 - ; maxcorr = 1 - MOV r0, #1 - BEQ celt_pitch_xcorr_edsp_process1u_done -; Compute one sum at the start to make y 32-bit aligned. - SUBS r12, r3, #4 - ; r14 = sum = 0 - MOV r14, #0 - LDRH r8, [r5], #2 - BLE celt_pitch_xcorr_edsp_process1u_loop4_done - LDR r6, [r4], #4 - MOV r8, r8, LSL #16 -celt_pitch_xcorr_edsp_process1u_loop4 - LDR r9, [r5], #4 - SMLABT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) - LDR r7, [r4], #4 - SMLATB r14, r6, r9, r14 ; sum = MAC16_16(sum, x_1, y_1) - LDR r8, [r5], #4 - SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) - SUBS r12, r12, #4 ; j-=4 - SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3) - LDRGT r6, [r4], #4 - BGT celt_pitch_xcorr_edsp_process1u_loop4 - MOV r8, r8, LSR #16 -celt_pitch_xcorr_edsp_process1u_loop4_done - ADDS r12, r12, #4 -celt_pitch_xcorr_edsp_process1u_loop1 - LDRHGE r6, [r4], #2 - ; Stall - SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) - SUBSGE r12, r12, #1 - LDRHGT r8, [r5], #2 - BGT celt_pitch_xcorr_edsp_process1u_loop1 - ; Restore _x - SUB r4, r4, r3, LSL #1 - ; Restore and advance _y - SUB r5, r5, r3, LSL #1 - ; maxcorr = max(maxcorr, sum) - CMP r0, r14 - ADD r5, r5, #2 - MOVLT r0, r14 - SUBS r1, r1, #1 - ; xcorr[i] = sum - STR r14, [r2], #4 - BLE celt_pitch_xcorr_edsp_done -celt_pitch_xcorr_edsp_process1u_done - ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 - SUBS r1, r1, #4 - BLT celt_pitch_xcorr_edsp_process2 -celt_pitch_xcorr_edsp_process4 - ; xcorr_kernel_edsp parameters: - ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} - MOV r6, #0 - MOV r7, #0 - MOV r8, #0 - MOV r9, #0 - BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) - ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) - CMP r0, r6 - ; _y+=4 - ADD r5, r5, #8 - MOVLT r0, r6 - CMP r0, r7 - MOVLT r0, r7 - CMP r0, r8 - MOVLT r0, r8 - CMP r0, r9 - MOVLT r0, r9 - STMIA r2!, {r6-r9} - SUBS r1, r1, #4 - BGE celt_pitch_xcorr_edsp_process4 -celt_pitch_xcorr_edsp_process2 - ADDS r1, r1, #2 - BLT celt_pitch_xcorr_edsp_process1a - SUBS r12, r3, #4 - ; {r10, r11} = {sum0, sum1} = {0, 0} - MOV r10, #0 - MOV r11, #0 - LDR r8, [r5], #4 - BLE celt_pitch_xcorr_edsp_process2_loop_done - LDR r6, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process2_loop4 - SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDR r7, [r4], #4 - SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) - SUBS r12, r12, #4 ; j-=4 - SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) - LDR r8, [r5], #4 - SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) - LDRGT r6, [r4], #4 - SMLABB r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_2, y_2) - SMLABT r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_2, y_3) - SMLATT r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_3, y_3) - LDRGT r9, [r5], #4 - SMLATB r11, r7, r8, r11 ; sum1 = MAC16_16(sum1, x_3, y_4) - BGT celt_pitch_xcorr_edsp_process2_loop4 -celt_pitch_xcorr_edsp_process2_loop_done - ADDS r12, r12, #2 - BLE celt_pitch_xcorr_edsp_process2_1 - LDR r6, [r4], #4 - ; Stall - SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDR r9, [r5], #4 - SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) - SUB r12, r12, #2 - SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) - MOV r8, r9 - SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) -celt_pitch_xcorr_edsp_process2_1 - LDRH r6, [r4], #2 - ADDS r12, r12, #1 - ; Stall - SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDRHGT r7, [r4], #2 - SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) - BLE celt_pitch_xcorr_edsp_process2_done - LDRH r9, [r5], #2 - SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1) - SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2) -celt_pitch_xcorr_edsp_process2_done - ; Restore _x - SUB r4, r4, r3, LSL #1 - ; Restore and advance _y - SUB r5, r5, r3, LSL #1 - ; maxcorr = max(maxcorr, sum0) - CMP r0, r10 - ADD r5, r5, #2 - MOVLT r0, r10 - SUB r1, r1, #2 - ; maxcorr = max(maxcorr, sum1) - CMP r0, r11 - ; xcorr[i] = sum - STR r10, [r2], #4 - MOVLT r0, r11 - STR r11, [r2], #4 -celt_pitch_xcorr_edsp_process1a - ADDS r1, r1, #1 - BLT celt_pitch_xcorr_edsp_done - SUBS r12, r3, #4 - ; r14 = sum = 0 - MOV r14, #0 - BLT celt_pitch_xcorr_edsp_process1a_loop_done - LDR r6, [r4], #4 - LDR r8, [r5], #4 - LDR r7, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process1a_loop4 - SMLABB r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) - SUBS r12, r12, #4 ; j-=4 - SMLATT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) - LDRGE r6, [r4], #4 - SMLABB r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) - LDRGE r8, [r5], #4 - SMLATT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_3, y_3) - LDRGE r7, [r4], #4 - LDRGE r9, [r5], #4 - BGE celt_pitch_xcorr_edsp_process1a_loop4 -celt_pitch_xcorr_edsp_process1a_loop_done - ADDS r12, r12, #2 - LDRGE r6, [r4], #4 - LDRGE r8, [r5], #4 - ; Stall - SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) - SUBGE r12, r12, #2 - SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) - ADDS r12, r12, #1 - LDRHGE r6, [r4], #2 - LDRHGE r8, [r5], #2 - ; Stall - SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) - ; maxcorr = max(maxcorr, sum) - CMP r0, r14 - ; xcorr[i] = sum - STR r14, [r2], #4 - MOVLT r0, r14 -celt_pitch_xcorr_edsp_done - LDMFD sp!, {r4-r11, pc} - ENDP - -ENDIF - -END diff --git a/thirdparty/opus/celt/arm/fft_arm.h b/thirdparty/opus/celt/arm/fft_arm.h deleted file mode 100644 index 0cb55d8e22..0000000000 --- a/thirdparty/opus/celt/arm/fft_arm.h +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file fft_arm.h - @brief ARM Neon Intrinsic optimizations for fft using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#if !defined(FFT_ARM_H) -#define FFT_ARM_H - -#include "config.h" -#include "kiss_fft.h" - -#if defined(HAVE_ARM_NE10) - -int opus_fft_alloc_arm_neon(kiss_fft_state *st); -void opus_fft_free_arm_neon(kiss_fft_state *st); - -void opus_fft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout); - -void opus_ifft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout); - -#if !defined(OPUS_HAVE_RTCD) -#define OVERRIDE_OPUS_FFT (1) - -#define opus_fft_alloc_arch(_st, arch) \ - ((void)(arch), opus_fft_alloc_arm_neon(_st)) - -#define opus_fft_free_arch(_st, arch) \ - ((void)(arch), opus_fft_free_arm_neon(_st)) - -#define opus_fft(_st, _fin, _fout, arch) \ - ((void)(arch), opus_fft_neon(_st, _fin, _fout)) - -#define opus_ifft(_st, _fin, _fout, arch) \ - ((void)(arch), opus_ifft_neon(_st, _fin, _fout)) - -#endif /* OPUS_HAVE_RTCD */ - -#endif /* HAVE_ARM_NE10 */ - -#endif diff --git a/thirdparty/opus/celt/arm/fixed_arm64.h b/thirdparty/opus/celt/arm/fixed_arm64.h deleted file mode 100644 index c6fbd3db2c..0000000000 --- a/thirdparty/opus/celt/arm/fixed_arm64.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (C) 2015 Vidyo */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_ARM64_H -#define FIXED_ARM64_H - -#include - -#undef SIG2WORD16 -#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT))) - -#endif diff --git a/thirdparty/opus/celt/arm/fixed_armv4.h b/thirdparty/opus/celt/arm/fixed_armv4.h deleted file mode 100644 index efb3b1896a..0000000000 --- a/thirdparty/opus/celt/arm/fixed_armv4.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (C) 2013 Xiph.Org Foundation and contributors */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_ARMv4_H -#define FIXED_ARMv4_H - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q16 -static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#MULT16_32_Q16\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b),"r"(a<<16) - ); - return rd_hi; -} -#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b)) - - -/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q15 -static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#MULT16_32_Q15\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b), "r"(a<<16) - ); - /*We intentionally don't OR in the high bit of rd_lo for speed.*/ - return rd_hi<<1; -} -#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b)) - - -/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. - b must fit in 31 bits. - Result fits in 32 bits. */ -#undef MAC16_32_Q15 -#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) - -/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. - Result fits in 32 bits. */ -#undef MAC16_32_Q16 -#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) - -/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ -#undef MULT32_32_Q31 -#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31) - -#endif diff --git a/thirdparty/opus/celt/arm/fixed_armv5e.h b/thirdparty/opus/celt/arm/fixed_armv5e.h deleted file mode 100644 index 36a6321101..0000000000 --- a/thirdparty/opus/celt/arm/fixed_armv5e.h +++ /dev/null @@ -1,151 +0,0 @@ -/* Copyright (C) 2007-2009 Xiph.Org Foundation - Copyright (C) 2003-2008 Jean-Marc Valin - Copyright (C) 2007-2008 CSIRO - Copyright (C) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_ARMv5E_H -#define FIXED_ARMv5E_H - -#include "fixed_armv4.h" - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q16 -static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) -{ - int res; - __asm__( - "#MULT16_32_Q16\n\t" - "smulwb %0, %1, %2\n\t" - : "=r"(res) - : "r"(b),"r"(a) - ); - return res; -} -#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b)) - - -/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q15 -static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) -{ - int res; - __asm__( - "#MULT16_32_Q15\n\t" - "smulwb %0, %1, %2\n\t" - : "=r"(res) - : "r"(b), "r"(a) - ); - return res<<1; -} -#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) - - -/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. - b must fit in 31 bits. - Result fits in 32 bits. */ -#undef MAC16_32_Q15 -static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, - opus_val32 b) -{ - int res; - __asm__( - "#MAC16_32_Q15\n\t" - "smlawb %0, %1, %2, %3;\n" - : "=r"(res) - : "r"(b<<1), "r"(a), "r"(c) - ); - return res; -} -#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) - -/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. - Result fits in 32 bits. */ -#undef MAC16_32_Q16 -static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, - opus_val32 b) -{ - int res; - __asm__( - "#MAC16_32_Q16\n\t" - "smlawb %0, %1, %2, %3;\n" - : "=r"(res) - : "r"(b), "r"(a), "r"(c) - ); - return res; -} -#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) - -/** 16x16 multiply-add where the result fits in 32 bits */ -#undef MAC16_16 -static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, - opus_val16 b) -{ - int res; - __asm__( - "#MAC16_16\n\t" - "smlabb %0, %1, %2, %3;\n" - : "=r"(res) - : "r"(a), "r"(b), "r"(c) - ); - return res; -} -#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b)) - -/** 16x16 multiplication where the result fits in 32 bits */ -#undef MULT16_16 -static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) -{ - int res; - __asm__( - "#MULT16_16\n\t" - "smulbb %0, %1, %2;\n" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) - -#ifdef OPUS_ARM_INLINE_MEDIA - -#undef SIG2WORD16 -static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) -{ - celt_sig res; - __asm__( - "#SIG2WORD16\n\t" - "ssat %0, #16, %1, ASR #12\n\t" - : "=r"(res) - : "r"(x+2048) - ); - return EXTRACT16(res); -} -#define SIG2WORD16(x) (SIG2WORD16_armv6(x)) - -#endif /* OPUS_ARM_INLINE_MEDIA */ - -#endif diff --git a/thirdparty/opus/celt/arm/kiss_fft_armv4.h b/thirdparty/opus/celt/arm/kiss_fft_armv4.h deleted file mode 100644 index e4faad6f2b..0000000000 --- a/thirdparty/opus/celt/arm/kiss_fft_armv4.h +++ /dev/null @@ -1,121 +0,0 @@ -/*Copyright (c) 2013, Xiph.Org Foundation and contributors. - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_ARMv4_H -#define KISS_FFT_ARMv4_H - -#if !defined(KISS_FFT_GUTS_H) -#error "This file should only be included from _kiss_fft_guts.h" -#endif - -#ifdef FIXED_POINT - -#undef C_MUL -#define C_MUL(m,a,b) \ - do{ \ - int br__; \ - int bi__; \ - int tt__; \ - __asm__ __volatile__( \ - "#C_MUL\n\t" \ - "ldrsh %[br], [%[bp], #0]\n\t" \ - "ldm %[ap], {r0,r1}\n\t" \ - "ldrsh %[bi], [%[bp], #2]\n\t" \ - "smull %[tt], %[mi], r1, %[br]\n\t" \ - "smlal %[tt], %[mi], r0, %[bi]\n\t" \ - "rsb %[bi], %[bi], #0\n\t" \ - "smull %[br], %[mr], r0, %[br]\n\t" \ - "mov %[tt], %[tt], lsr #15\n\t" \ - "smlal %[br], %[mr], r1, %[bi]\n\t" \ - "orr %[mi], %[tt], %[mi], lsl #17\n\t" \ - "mov %[br], %[br], lsr #15\n\t" \ - "orr %[mr], %[br], %[mr], lsl #17\n\t" \ - : [mr]"=r"((m).r), [mi]"=r"((m).i), \ - [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ - : [ap]"r"(&(a)), [bp]"r"(&(b)) \ - : "r0", "r1" \ - ); \ - } \ - while(0) - -#undef C_MUL4 -#define C_MUL4(m,a,b) \ - do{ \ - int br__; \ - int bi__; \ - int tt__; \ - __asm__ __volatile__( \ - "#C_MUL4\n\t" \ - "ldrsh %[br], [%[bp], #0]\n\t" \ - "ldm %[ap], {r0,r1}\n\t" \ - "ldrsh %[bi], [%[bp], #2]\n\t" \ - "smull %[tt], %[mi], r1, %[br]\n\t" \ - "smlal %[tt], %[mi], r0, %[bi]\n\t" \ - "rsb %[bi], %[bi], #0\n\t" \ - "smull %[br], %[mr], r0, %[br]\n\t" \ - "mov %[tt], %[tt], lsr #17\n\t" \ - "smlal %[br], %[mr], r1, %[bi]\n\t" \ - "orr %[mi], %[tt], %[mi], lsl #15\n\t" \ - "mov %[br], %[br], lsr #17\n\t" \ - "orr %[mr], %[br], %[mr], lsl #15\n\t" \ - : [mr]"=r"((m).r), [mi]"=r"((m).i), \ - [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ - : [ap]"r"(&(a)), [bp]"r"(&(b)) \ - : "r0", "r1" \ - ); \ - } \ - while(0) - -#undef C_MULC -#define C_MULC(m,a,b) \ - do{ \ - int br__; \ - int bi__; \ - int tt__; \ - __asm__ __volatile__( \ - "#C_MULC\n\t" \ - "ldrsh %[br], [%[bp], #0]\n\t" \ - "ldm %[ap], {r0,r1}\n\t" \ - "ldrsh %[bi], [%[bp], #2]\n\t" \ - "smull %[tt], %[mr], r0, %[br]\n\t" \ - "smlal %[tt], %[mr], r1, %[bi]\n\t" \ - "rsb %[bi], %[bi], #0\n\t" \ - "smull %[br], %[mi], r1, %[br]\n\t" \ - "mov %[tt], %[tt], lsr #15\n\t" \ - "smlal %[br], %[mi], r0, %[bi]\n\t" \ - "orr %[mr], %[tt], %[mr], lsl #17\n\t" \ - "mov %[br], %[br], lsr #15\n\t" \ - "orr %[mi], %[br], %[mi], lsl #17\n\t" \ - : [mr]"=r"((m).r), [mi]"=r"((m).i), \ - [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ - : [ap]"r"(&(a)), [bp]"r"(&(b)) \ - : "r0", "r1" \ - ); \ - } \ - while(0) - -#endif /* FIXED_POINT */ - -#endif /* KISS_FFT_ARMv4_H */ diff --git a/thirdparty/opus/celt/arm/kiss_fft_armv5e.h b/thirdparty/opus/celt/arm/kiss_fft_armv5e.h deleted file mode 100644 index 9eca183d77..0000000000 --- a/thirdparty/opus/celt/arm/kiss_fft_armv5e.h +++ /dev/null @@ -1,118 +0,0 @@ -/*Copyright (c) 2013, Xiph.Org Foundation and contributors. - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_ARMv5E_H -#define KISS_FFT_ARMv5E_H - -#if !defined(KISS_FFT_GUTS_H) -#error "This file should only be included from _kiss_fft_guts.h" -#endif - -#ifdef FIXED_POINT - -#if defined(__thumb__)||defined(__thumb2__) -#define LDRD_CONS "Q" -#else -#define LDRD_CONS "Uq" -#endif - -#undef C_MUL -#define C_MUL(m,a,b) \ - do{ \ - int mr1__; \ - int mr2__; \ - int mi__; \ - long long aval__; \ - int bval__; \ - __asm__( \ - "#C_MUL\n\t" \ - "ldrd %[aval], %H[aval], %[ap]\n\t" \ - "ldr %[bval], %[bp]\n\t" \ - "smulwb %[mi], %H[aval], %[bval]\n\t" \ - "smulwb %[mr1], %[aval], %[bval]\n\t" \ - "smulwt %[mr2], %H[aval], %[bval]\n\t" \ - "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ - : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ - [aval]"=&r"(aval__), [bval]"=r"(bval__) \ - : [ap]LDRD_CONS(a), [bp]"m"(b) \ - ); \ - (m).r = SHL32(SUB32(mr1__, mr2__), 1); \ - (m).i = SHL32(mi__, 1); \ - } \ - while(0) - -#undef C_MUL4 -#define C_MUL4(m,a,b) \ - do{ \ - int mr1__; \ - int mr2__; \ - int mi__; \ - long long aval__; \ - int bval__; \ - __asm__( \ - "#C_MUL4\n\t" \ - "ldrd %[aval], %H[aval], %[ap]\n\t" \ - "ldr %[bval], %[bp]\n\t" \ - "smulwb %[mi], %H[aval], %[bval]\n\t" \ - "smulwb %[mr1], %[aval], %[bval]\n\t" \ - "smulwt %[mr2], %H[aval], %[bval]\n\t" \ - "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ - : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ - [aval]"=&r"(aval__), [bval]"=r"(bval__) \ - : [ap]LDRD_CONS(a), [bp]"m"(b) \ - ); \ - (m).r = SHR32(SUB32(mr1__, mr2__), 1); \ - (m).i = SHR32(mi__, 1); \ - } \ - while(0) - -#undef C_MULC -#define C_MULC(m,a,b) \ - do{ \ - int mr__; \ - int mi1__; \ - int mi2__; \ - long long aval__; \ - int bval__; \ - __asm__( \ - "#C_MULC\n\t" \ - "ldrd %[aval], %H[aval], %[ap]\n\t" \ - "ldr %[bval], %[bp]\n\t" \ - "smulwb %[mr], %[aval], %[bval]\n\t" \ - "smulwb %[mi1], %H[aval], %[bval]\n\t" \ - "smulwt %[mi2], %[aval], %[bval]\n\t" \ - "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \ - : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \ - [aval]"=&r"(aval__), [bval]"=r"(bval__) \ - : [ap]LDRD_CONS(a), [bp]"m"(b) \ - ); \ - (m).r = SHL32(mr__, 1); \ - (m).i = SHL32(SUB32(mi1__, mi2__), 1); \ - } \ - while(0) - -#endif /* FIXED_POINT */ - -#endif /* KISS_FFT_GUTS_H */ diff --git a/thirdparty/opus/celt/arm/mdct_arm.h b/thirdparty/opus/celt/arm/mdct_arm.h deleted file mode 100644 index 49cbb44576..0000000000 --- a/thirdparty/opus/celt/arm/mdct_arm.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file arm_mdct.h - @brief ARM Neon Intrinsic optimizations for mdct using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(MDCT_ARM_H) -#define MDCT_ARM_H - -#include "config.h" -#include "mdct.h" - -#if defined(HAVE_ARM_NE10) -/** Compute a forward MDCT and scale by 4/N, trashes the input array */ -void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, - int shift, int stride, int arch); - -void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, - int shift, int stride, int arch); - -#if !defined(OPUS_HAVE_RTCD) -#define OVERRIDE_OPUS_MDCT (1) -#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ - clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) -#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ - clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) -#endif /* OPUS_HAVE_RTCD */ -#endif /* HAVE_ARM_NE10 */ - -#endif diff --git a/thirdparty/opus/celt/arm/pitch_arm.h b/thirdparty/opus/celt/arm/pitch_arm.h deleted file mode 100644 index 14331169ee..0000000000 --- a/thirdparty/opus/celt/arm/pitch_arm.h +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(PITCH_ARM_H) -# define PITCH_ARM_H - -# include "armcpu.h" - -# if defined(FIXED_POINT) - -# if defined(OPUS_ARM_MAY_HAVE_NEON) -opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch); -# endif - -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) -# define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr) -# endif - -# if defined(OPUS_ARM_MAY_HAVE_EDSP) -opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch); -# endif - -# if defined(OPUS_HAVE_RTCD) && \ - ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ - (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ - (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) -extern opus_val32 -(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int); -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch)) - -# elif defined(OPUS_ARM_PRESUME_EDSP) || \ - defined(OPUS_ARM_PRESUME_MEDIA) || \ - defined(OPUS_ARM_PRESUME_NEON) -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) - -# endif - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void xcorr_kernel_neon_fixed( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); -# endif - -# if defined(OPUS_HAVE_RTCD) && \ - (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) - -extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); - -# define OVERRIDE_XCORR_KERNEL (1) -# define xcorr_kernel(x, y, sum, len, arch) \ - ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) - -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_XCORR_KERNEL (1) -# define xcorr_kernel(x, y, sum, len, arch) \ - ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len)) - -# endif - -#else /* Start !FIXED_POINT */ -/* Float case */ -#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch); -#endif - -# if defined(OPUS_HAVE_RTCD) && \ - (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern void -(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int); - -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch)) - -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) - -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) - -# endif - -#endif /* end !FIXED_POINT */ - -#endif diff --git a/thirdparty/opus/celt/bands.c b/thirdparty/opus/celt/bands.c deleted file mode 100644 index 87eaa6c031..0000000000 --- a/thirdparty/opus/celt/bands.c +++ /dev/null @@ -1,1529 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008-2009 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "bands.h" -#include "modes.h" -#include "vq.h" -#include "cwrs.h" -#include "stack_alloc.h" -#include "os_support.h" -#include "mathops.h" -#include "rate.h" -#include "quant_bands.h" -#include "pitch.h" - -int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev) -{ - int i; - for (i=0;iprev && val < thresholds[prev]+hysteresis[prev]) - i=prev; - if (i thresholds[prev-1]-hysteresis[prev-1]) - i=prev; - return i; -} - -opus_uint32 celt_lcg_rand(opus_uint32 seed) -{ - return 1664525 * seed + 1013904223; -} - -/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness - with this approximation is important because it has an impact on the bit allocation */ -static opus_int16 bitexact_cos(opus_int16 x) -{ - opus_int32 tmp; - opus_int16 x2; - tmp = (4096+((opus_int32)(x)*(x)))>>13; - celt_assert(tmp<=32767); - x2 = tmp; - x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2))))); - celt_assert(x2<=32766); - return 1+x2; -} - -static int bitexact_log2tan(int isin,int icos) -{ - int lc; - int ls; - lc=EC_ILOG(icos); - ls=EC_ILOG(isin); - icos<<=15-lc; - isin<<=15-ls; - return (ls-lc)*(1<<11) - +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932) - -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932); -} - -#ifdef FIXED_POINT -/* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) -{ - int i, c, N; - const opus_int16 *eBands = m->eBands; - N = m->shortMdctSize< 0) - { - int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1); - j=eBands[i]<0) - { - do { - sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), - EXTRACT16(SHR32(X[j+c*N],shift))); - } while (++jnbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); - } else { - bandE[i+c*m->nbEBands] = EPSILON; - } - /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ - } - } while (++ceBands; - N = M*m->shortMdctSize; - c=0; do { - i=0; do { - opus_val16 g; - int j,shift; - opus_val16 E; - shift = celt_zlog2(bandE[i+c*m->nbEBands])-13; - E = VSHR32(bandE[i+c*m->nbEBands], shift); - g = EXTRACT16(celt_rcp(SHL32(E,3))); - j=M*eBands[i]; do { - X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g); - } while (++jeBands; - N = m->shortMdctSize<nbEBands] = celt_sqrt(sum); - /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ - } - } while (++ceBands; - N = M*m->shortMdctSize; - c=0; do { - for (i=0;inbEBands]); - for (j=M*eBands[i];jeBands; - N = M*m->shortMdctSize; - bound = M*eBands[end]; - if (downsample!=1) - bound = IMIN(bound, N/downsample); - if (silence) - { - bound = 0; - start = end = 0; - } - f = freq; - x = X+M*eBands[start]; - for (i=0;i>DB_SHIFT); - if (shift>31) - { - shift=0; - g=0; - } else { - /* Handle the fractional part. */ - g = celt_exp2_frac(lg&((1<eBands[i+1]-m->eBands[i]; - /* depth in 1/8 bits */ - celt_assert(pulses[i]>=0); - depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; - -#ifdef FIXED_POINT - thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); - thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32)); - { - opus_val32 t; - t = N0<>1; - t = SHL32(t, (7-shift)<<1); - sqrt_1 = celt_rsqrt_norm(t); - } -#else - thresh = .5f*celt_exp2(-.125f*depth); - sqrt_1 = celt_rsqrt(N0<nbEBands+i]; - prev2 = prev2logE[c*m->nbEBands+i]; - if (C==1) - { - prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]); - prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]); - } - Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2)); - Ediff = MAX32(0, Ediff); - -#ifdef FIXED_POINT - if (Ediff < 16384) - { - opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1); - r = 2*MIN16(16383,r32); - } else { - r = 0; - } - if (LM==3) - r = MULT16_16_Q14(23170, MIN32(23169, r)); - r = SHR16(MIN16(thresh, r),1); - r = SHR32(MULT16_16_Q15(sqrt_1, r),shift); -#else - /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because - short blocks don't have the same energy as long */ - r = 2.f*celt_exp2(-Ediff); - if (LM==3) - r *= 1.41421356f; - r = MIN16(thresh, r); - r = r*sqrt_1; -#endif - X = X_+c*size+(m->eBands[i]<nbEBands]))-13; -#endif - left = VSHR32(bandE[i],shift); - right = VSHR32(bandE[i+m->nbEBands],shift); - norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right)); - a1 = DIV32_16(SHL32(EXTEND32(left),14),norm); - a2 = DIV32_16(SHL32(EXTEND32(right),14),norm); - for (j=0;j>1; - kr = celt_ilog2(Er)>>1; -#endif - t = VSHR32(El, (kl-7)<<1); - lgain = celt_rsqrt_norm(t); - t = VSHR32(Er, (kr-7)<<1); - rgain = celt_rsqrt_norm(t); - -#ifdef FIXED_POINT - if (kl < 7) - kl = 7; - if (kr < 7) - kr = 7; -#endif - - for (j=0;jeBands; - int decision; - int hf_sum=0; - - celt_assert(end>0); - - N0 = M*m->shortMdctSize; - - if (M*(eBands[end]-eBands[end-1]) <= 8) - return SPREAD_NONE; - c=0; do { - for (i=0;im->nbEBands-4) - hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); - tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); - sum += tmp*256; - nbBands++; - } - } while (++cnbEBands+end)); - *hf_average = (*hf_average+hf_sum)>>1; - hf_sum = *hf_average; - if (*tapset_decision==2) - hf_sum += 4; - else if (*tapset_decision==0) - hf_sum -= 4; - if (hf_sum > 22) - *tapset_decision=2; - else if (hf_sum > 18) - *tapset_decision=1; - else - *tapset_decision=0; - } - /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ - celt_assert(nbBands>0); /* end has to be non-zero */ - celt_assert(sum>=0); - sum = celt_udiv(sum, nbBands); - /* Recursive averaging */ - sum = (sum+*average)>>1; - *average = sum; - /* Hysteresis */ - sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2; - if (sum < 80) - { - decision = SPREAD_AGGRESSIVE; - } else if (sum < 256) - { - decision = SPREAD_NORMAL; - } else if (sum < 384) - { - decision = SPREAD_LIGHT; - } else { - decision = SPREAD_NONE; - } -#ifdef FUZZING - decision = rand()&0x3; - *tapset_decision=rand()%3; -#endif - return decision; -} - -/* Indexing table for converting from natural Hadamard to ordery Hadamard - This is essentially a bit-reversed Gray, on top of which we've added - an inversion of the order because we want the DC at the end rather than - the beginning. The lines are for N=2, 4, 8, 16 */ -static const int ordery_table[] = { - 1, 0, - 3, 0, 2, 1, - 7, 0, 4, 3, 6, 1, 5, 2, - 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5, -}; - -static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) -{ - int i,j; - VARDECL(celt_norm, tmp); - int N; - SAVE_STACK; - N = N0*stride; - ALLOC(tmp, N, celt_norm); - celt_assert(stride>0); - if (hadamard) - { - const int *ordery = ordery_table+stride-2; - for (i=0;i>= 1; - for (i=0;i>1)) { - qn = 1; - } else { - qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES)); - qn = (qn+1)>>1<<1; - } - celt_assert(qn <= 256); - return qn; -} - -struct band_ctx { - int encode; - const CELTMode *m; - int i; - int intensity; - int spread; - int tf_change; - ec_ctx *ec; - opus_int32 remaining_bits; - const celt_ener *bandE; - opus_uint32 seed; - int arch; -}; - -struct split_ctx { - int inv; - int imid; - int iside; - int delta; - int itheta; - int qalloc; -}; - -static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, - celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0, - int LM, - int stereo, int *fill) -{ - int qn; - int itheta=0; - int delta; - int imid, iside; - int qalloc; - int pulse_cap; - int offset; - opus_int32 tell; - int inv=0; - int encode; - const CELTMode *m; - int i; - int intensity; - ec_ctx *ec; - const celt_ener *bandE; - - encode = ctx->encode; - m = ctx->m; - i = ctx->i; - intensity = ctx->intensity; - ec = ctx->ec; - bandE = ctx->bandE; - - /* Decide on the resolution to give to the split parameter theta */ - pulse_cap = m->logN[i]+LM*(1<>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET); - qn = compute_qn(N, *b, offset, pulse_cap, stereo); - if (stereo && i>=intensity) - qn = 1; - if (encode) - { - /* theta is the atan() of the ratio between the (normalized) - side and mid. With just that parameter, we can re-scale both - mid and side because we know that 1) they have unit norm and - 2) they are orthogonal. */ - itheta = stereo_itheta(X, Y, stereo, N, ctx->arch); - } - tell = ec_tell_frac(ec); - if (qn!=1) - { - if (encode) - itheta = (itheta*(opus_int32)qn+8192)>>14; - - /* Entropy coding of the angle. We use a uniform pdf for the - time split, a step for stereo, and a triangular one for the rest. */ - if (stereo && N>2) - { - int p0 = 3; - int x = itheta; - int x0 = qn/2; - int ft = p0*(x0+1) + x0; - /* Use a probability of p0 up to itheta=8192 and then use 1 after */ - if (encode) - { - ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); - } else { - int fs; - fs=ec_decode(ec,ft); - if (fs<(x0+1)*p0) - x=fs/p0; - else - x=x0+1+(fs-(x0+1)*p0); - ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); - itheta = x; - } - } else if (B0>1 || stereo) { - /* Uniform pdf */ - if (encode) - ec_enc_uint(ec, itheta, qn+1); - else - itheta = ec_dec_uint(ec, qn+1); - } else { - int fs=1, ft; - ft = ((qn>>1)+1)*((qn>>1)+1); - if (encode) - { - int fl; - - fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta; - fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 : - ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); - - ec_encode(ec, fl, fl+fs, ft); - } else { - /* Triangular pdf */ - int fl=0; - int fm; - fm = ec_decode(ec, ft); - - if (fm < ((qn>>1)*((qn>>1) + 1)>>1)) - { - itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1; - fs = itheta + 1; - fl = itheta*(itheta + 1)>>1; - } - else - { - itheta = (2*(qn + 1) - - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1; - fs = qn + 1 - itheta; - fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); - } - - ec_dec_update(ec, fl, fl+fs, ft); - } - } - celt_assert(itheta>=0); - itheta = celt_udiv((opus_int32)itheta*16384, qn); - if (encode && stereo) - { - if (itheta==0) - intensity_stereo(m, X, Y, bandE, i, N); - else - stereo_split(X, Y, N); - } - /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. - Let's do that at higher complexity */ - } else if (stereo) { - if (encode) - { - inv = itheta > 8192; - if (inv) - { - int j; - for (j=0;j2<remaining_bits > 2<inv = inv; - sctx->imid = imid; - sctx->iside = iside; - sctx->delta = delta; - sctx->itheta = itheta; - sctx->qalloc = qalloc; -} -static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, - celt_norm *lowband_out) -{ -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !ctx->encode; -#endif - int c; - int stereo; - celt_norm *x = X; - int encode; - ec_ctx *ec; - - encode = ctx->encode; - ec = ctx->ec; - - stereo = Y != NULL; - c=0; do { - int sign=0; - if (ctx->remaining_bits>=1<remaining_bits -= 1<encode; -#endif - celt_norm *Y=NULL; - int encode; - const CELTMode *m; - int i; - int spread; - ec_ctx *ec; - - encode = ctx->encode; - m = ctx->m; - i = ctx->i; - spread = ctx->spread; - ec = ctx->ec; - - /* If we need 1.5 more bit than we can produce, split the band in two. */ - cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i]; - if (LM != -1 && b > cache[cache[0]]+12 && N>2) - { - int mbits, sbits, delta; - int itheta; - int qalloc; - struct split_ctx sctx; - celt_norm *next_lowband2=NULL; - opus_int32 rebalance; - - N >>= 1; - Y = X+N; - LM -= 1; - if (B==1) - fill = (fill&1)|(fill<<1); - B = (B+1)>>1; - - compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, - LM, 0, &fill); - imid = sctx.imid; - iside = sctx.iside; - delta = sctx.delta; - itheta = sctx.itheta; - qalloc = sctx.qalloc; -#ifdef FIXED_POINT - mid = imid; - side = iside; -#else - mid = (1.f/32768)*imid; - side = (1.f/32768)*iside; -#endif - - /* Give more bits to low-energy MDCTs than they would otherwise deserve */ - if (B0>1 && (itheta&0x3fff)) - { - if (itheta > 8192) - /* Rough approximation for pre-echo masking */ - delta -= delta>>(4-LM); - else - /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */ - delta = IMIN(0, delta + (N<>(5-LM))); - } - mbits = IMAX(0, IMIN(b, (b-delta)/2)); - sbits = b-mbits; - ctx->remaining_bits -= qalloc; - - if (lowband) - next_lowband2 = lowband+N; /* >32-bit split case */ - - rebalance = ctx->remaining_bits; - if (mbits >= sbits) - { - cm = quant_partition(ctx, X, N, mbits, B, - lowband, LM, - MULT16_16_P15(gain,mid), fill); - rebalance = mbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<>B)<<(B0>>1); - } else { - cm = quant_partition(ctx, Y, N, sbits, B, - next_lowband2, LM, - MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); - rebalance = sbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<remaining_bits -= curr_bits; - - /* Ensures we can never bust the budget */ - while (ctx->remaining_bits < 0 && q > 0) - { - ctx->remaining_bits += curr_bits; - q--; - curr_bits = pulses2bits(m, i, LM, q); - ctx->remaining_bits -= curr_bits; - } - - if (q!=0) - { - int K = get_pulses(q); - - /* Finally do the actual quantization */ - if (encode) - { - cm = alg_quant(X, N, K, spread, B, ec -#ifdef RESYNTH - , gain -#endif - ); - } else { - cm = alg_unquant(X, N, K, spread, B, ec, gain); - } - } else { - /* If there's no pulse, fill the band anyway */ - int j; - if (resynth) - { - unsigned cm_mask; - /* B can be as large as 16, so this shift might overflow an int on a - 16-bit platform; use a long to get defined behavior.*/ - cm_mask = (unsigned)(1UL<seed = celt_lcg_rand(ctx->seed); - X[j] = (celt_norm)((opus_int32)ctx->seed>>20); - } - cm = cm_mask; - } else { - /* Folded spectrum */ - for (j=0;jseed = celt_lcg_rand(ctx->seed); - /* About 48 dB below the "normal" folding level */ - tmp = QCONST16(1.0f/256, 10); - tmp = (ctx->seed)&0x8000 ? tmp : -tmp; - X[j] = lowband[j]+tmp; - } - cm = fill; - } - renormalise_vector(X, N, gain, ctx->arch); - } - } - } - } - - return cm; -} - - -/* This function is responsible for encoding and decoding a band for the mono case. */ -static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, - int N, int b, int B, celt_norm *lowband, - int LM, celt_norm *lowband_out, - opus_val16 gain, celt_norm *lowband_scratch, int fill) -{ - int N0=N; - int N_B=N; - int N_B0; - int B0=B; - int time_divide=0; - int recombine=0; - int longBlocks; - unsigned cm=0; -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !ctx->encode; -#endif - int k; - int encode; - int tf_change; - - encode = ctx->encode; - tf_change = ctx->tf_change; - - longBlocks = B0==1; - - N_B = celt_udiv(N_B, B); - - /* Special case for one sample */ - if (N==1) - { - return quant_band_n1(ctx, X, NULL, b, lowband_out); - } - - if (tf_change>0) - recombine = tf_change; - /* Band recombining to increase frequency resolution */ - - if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) - { - OPUS_COPY(lowband_scratch, lowband, N); - lowband = lowband_scratch; - } - - for (k=0;k>k, 1<>k, 1<>4]<<2; - } - B>>=recombine; - N_B<<=recombine; - - /* Increasing the time resolution */ - while ((N_B&1) == 0 && tf_change<0) - { - if (encode) - haar1(X, N_B, B); - if (lowband) - haar1(lowband, N_B, B); - fill |= fill<>= 1; - time_divide++; - tf_change++; - } - B0=B; - N_B0 = N_B; - - /* Reorganize the samples in time order instead of frequency order */ - if (B0>1) - { - if (encode) - deinterleave_hadamard(X, N_B>>recombine, B0<>recombine, B0<1) - interleave_hadamard(X, N_B>>recombine, B0<>= 1; - N_B <<= 1; - cm |= cm>>B; - haar1(X, N_B, B); - } - - for (k=0;k>k, 1<encode; -#endif - int mbits, sbits, delta; - int itheta; - int qalloc; - struct split_ctx sctx; - int orig_fill; - int encode; - ec_ctx *ec; - - encode = ctx->encode; - ec = ctx->ec; - - /* Special case for one sample */ - if (N==1) - { - return quant_band_n1(ctx, X, Y, b, lowband_out); - } - - orig_fill = fill; - - compute_theta(ctx, &sctx, X, Y, N, &b, B, B, - LM, 1, &fill); - inv = sctx.inv; - imid = sctx.imid; - iside = sctx.iside; - delta = sctx.delta; - itheta = sctx.itheta; - qalloc = sctx.qalloc; -#ifdef FIXED_POINT - mid = imid; - side = iside; -#else - mid = (1.f/32768)*imid; - side = (1.f/32768)*iside; -#endif - - /* This is a special case for N=2 that only works for stereo and takes - advantage of the fact that mid and side are orthogonal to encode - the side with just one bit. */ - if (N==2) - { - int c; - int sign=0; - celt_norm *x2, *y2; - mbits = b; - sbits = 0; - /* Only need one bit for the side. */ - if (itheta != 0 && itheta != 16384) - sbits = 1< 8192; - ctx->remaining_bits -= qalloc+sbits; - - x2 = c ? Y : X; - y2 = c ? X : Y; - if (sbits) - { - if (encode) - { - /* Here we only need to encode a sign for the side. */ - sign = x2[0]*y2[1] - x2[1]*y2[0] < 0; - ec_enc_bits(ec, sign, 1); - } else { - sign = ec_dec_bits(ec, 1); - } - } - sign = 1-2*sign; - /* We use orig_fill here because we want to fold the side, but if - itheta==16384, we'll have cleared the low bits of fill. */ - cm = quant_band(ctx, x2, N, mbits, B, lowband, - LM, lowband_out, Q15ONE, lowband_scratch, orig_fill); - /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), - and there's no need to worry about mixing with the other channel. */ - y2[0] = -sign*x2[1]; - y2[1] = sign*x2[0]; - if (resynth) - { - celt_norm tmp; - X[0] = MULT16_16_Q15(mid, X[0]); - X[1] = MULT16_16_Q15(mid, X[1]); - Y[0] = MULT16_16_Q15(side, Y[0]); - Y[1] = MULT16_16_Q15(side, Y[1]); - tmp = X[0]; - X[0] = SUB16(tmp,Y[0]); - Y[0] = ADD16(tmp,Y[0]); - tmp = X[1]; - X[1] = SUB16(tmp,Y[1]); - Y[1] = ADD16(tmp,Y[1]); - } - } else { - /* "Normal" split code */ - opus_int32 rebalance; - - mbits = IMAX(0, IMIN(b, (b-delta)/2)); - sbits = b-mbits; - ctx->remaining_bits -= qalloc; - - rebalance = ctx->remaining_bits; - if (mbits >= sbits) - { - /* In stereo mode, we do not apply a scaling to the mid because we need the normalized - mid for folding later. */ - cm = quant_band(ctx, X, N, mbits, B, - lowband, LM, lowband_out, - Q15ONE, lowband_scratch, fill); - rebalance = mbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<>B); - } else { - /* For a stereo split, the high bits of fill are always zero, so no - folding will be done to the side. */ - cm = quant_band(ctx, Y, N, sbits, B, - NULL, LM, NULL, - side, NULL, fill>>B); - rebalance = sbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<arch); - if (inv) - { - int j; - for (j=0;jeBands; - celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2; - VARDECL(celt_norm, _norm); - celt_norm *lowband_scratch; - int B; - int M; - int lowband_offset; - int update_lowband = 1; - int C = Y_ != NULL ? 2 : 1; - int norm_offset; -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !encode; -#endif - struct band_ctx ctx; - SAVE_STACK; - - M = 1<nbEBands-1]-norm_offset), celt_norm); - norm = _norm; - norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset; - /* We can use the last band as scratch space because we don't need that - scratch space for the last band. */ - lowband_scratch = X_+M*eBands[m->nbEBands-1]; - - lowband_offset = 0; - ctx.bandE = bandE; - ctx.ec = ec; - ctx.encode = encode; - ctx.intensity = intensity; - ctx.m = m; - ctx.seed = *seed; - ctx.spread = spread; - ctx.arch = arch; - for (i=start;i= M*eBands[start] && (update_lowband || lowband_offset==0)) - lowband_offset = i; - - tf_change = tf_res[i]; - ctx.tf_change = tf_change; - if (i>=m->effEBands) - { - X=norm; - if (Y_!=NULL) - Y = norm; - lowband_scratch = NULL; - } - if (i==end-1) - lowband_scratch = NULL; - - /* Get a conservative estimate of the collapse_mask's for the bands we're - going to be folding from. */ - if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0)) - { - int fold_start; - int fold_end; - int fold_i; - /* This ensures we never repeat spectral content within one band */ - effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N); - fold_start = lowband_offset; - while(M*eBands[--fold_start] > effective_lowband+norm_offset); - fold_end = lowband_offset-1; - while(M*eBands[++fold_end] < effective_lowband+norm_offset+N); - x_cm = y_cm = 0; - fold_i = fold_start; do { - x_cm |= collapse_masks[fold_i*C+0]; - y_cm |= collapse_masks[fold_i*C+C-1]; - } while (++fold_i(N< -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include -#include "celt_lpc.h" -#include "vq.h" - -#ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "unknown" -#endif - -#if defined(MIPSr1_ASM) -#include "mips/celt_mipsr1.h" -#endif - - -int resampling_factor(opus_int32 rate) -{ - int ret; - switch (rate) - { - case 48000: - ret = 1; - break; - case 24000: - ret = 2; - break; - case 16000: - ret = 3; - break; - case 12000: - ret = 4; - break; - case 8000: - ret = 6; - break; - default: -#ifndef CUSTOM_MODES - celt_assert(0); -#endif - ret = 0; - break; - } - return ret; -} - -#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C) -/* This version should be faster on ARM */ -#ifdef OPUS_ARM_ASM -#ifndef NON_STATIC_COMB_FILTER_CONST_C -static -#endif -void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12) -{ - opus_val32 x0, x1, x2, x3, x4; - int i; - x4 = SHL32(x[-T-2], 1); - x3 = SHL32(x[-T-1], 1); - x2 = SHL32(x[-T], 1); - x1 = SHL32(x[-T+1], 1); - for (i=0;inbEBands;i++) - { - int N; - N=(m->eBands[i+1]-m->eBands[i])<cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2; - } -} - - - -const char *opus_strerror(int error) -{ - static const char * const error_strings[8] = { - "success", - "invalid argument", - "buffer too small", - "internal error", - "corrupted stream", - "request not implemented", - "invalid state", - "memory allocation failed" - }; - if (error > 0 || error < -7) - return "unknown error"; - else - return error_strings[-error]; -} - -const char *opus_get_version_string(void) -{ - return "libopus " PACKAGE_VERSION - /* Applications may rely on the presence of this substring in the version - string to determine if they have a fixed-point or floating-point build - at runtime. */ -#ifdef FIXED_POINT - "-fixed" -#endif -#ifdef FUZZING - "-fuzzing" -#endif - ; -} diff --git a/thirdparty/opus/celt/celt.h b/thirdparty/opus/celt/celt.h deleted file mode 100644 index d1f7eb690d..0000000000 --- a/thirdparty/opus/celt/celt.h +++ /dev/null @@ -1,229 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/** - @file celt.h - @brief Contains all the functions for encoding and decoding audio - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CELT_H -#define CELT_H - -#include "opus_types.h" -#include "opus_defines.h" -#include "opus_custom.h" -#include "entenc.h" -#include "entdec.h" -#include "arch.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define CELTEncoder OpusCustomEncoder -#define CELTDecoder OpusCustomDecoder -#define CELTMode OpusCustomMode - -typedef struct { - int valid; - float tonality; - float tonality_slope; - float noisiness; - float activity; - float music_prob; - int bandwidth; -}AnalysisInfo; - -#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) - -#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) - -/* Encoder/decoder Requests */ - -/* Expose this option again when variable framesize actually works */ -#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */ - - -#define CELT_SET_PREDICTION_REQUEST 10002 -/** Controls the use of interframe prediction. - 0=Independent frames - 1=Short term interframe prediction allowed - 2=Long term prediction allowed - */ -#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x) - -#define CELT_SET_INPUT_CLIPPING_REQUEST 10004 -#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x) - -#define CELT_GET_AND_CLEAR_ERROR_REQUEST 10007 -#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x) - -#define CELT_SET_CHANNELS_REQUEST 10008 -#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x) - - -/* Internal */ -#define CELT_SET_START_BAND_REQUEST 10010 -#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x) - -#define CELT_SET_END_BAND_REQUEST 10012 -#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x) - -#define CELT_GET_MODE_REQUEST 10015 -/** Get the CELTMode used by an encoder or decoder */ -#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x) - -#define CELT_SET_SIGNALLING_REQUEST 10016 -#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x) - -#define CELT_SET_TONALITY_REQUEST 10018 -#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x) -#define CELT_SET_TONALITY_SLOPE_REQUEST 10020 -#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x) - -#define CELT_SET_ANALYSIS_REQUEST 10022 -#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) - -#define OPUS_SET_LFE_REQUEST 10024 -#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) - -#define OPUS_SET_ENERGY_MASK_REQUEST 10026 -#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) - -/* Encoder stuff */ - -int celt_encoder_get_size(int channels); - -int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc); - -int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, - int arch); - - - -/* Decoder stuff */ - -int celt_decoder_get_size(int channels); - - -int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); - -int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, - int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum); - -#define celt_encoder_ctl opus_custom_encoder_ctl -#define celt_decoder_ctl opus_custom_decoder_ctl - - -#ifdef CUSTOM_MODES -#define OPUS_CUSTOM_NOSTATIC -#else -#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE -#endif - -static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0}; -/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */ -static const unsigned char spread_icdf[4] = {25, 23, 2, 0}; - -static const unsigned char tapset_icdf[3]={2,1,0}; - -#ifdef CUSTOM_MODES -static const unsigned char toOpusTable[20] = { - 0xE0, 0xE8, 0xF0, 0xF8, - 0xC0, 0xC8, 0xD0, 0xD8, - 0xA0, 0xA8, 0xB0, 0xB8, - 0x00, 0x00, 0x00, 0x00, - 0x80, 0x88, 0x90, 0x98, -}; - -static const unsigned char fromOpusTable[16] = { - 0x80, 0x88, 0x90, 0x98, - 0x40, 0x48, 0x50, 0x58, - 0x20, 0x28, 0x30, 0x38, - 0x00, 0x08, 0x10, 0x18 -}; - -static OPUS_INLINE int toOpus(unsigned char c) -{ - int ret=0; - if (c<0xA0) - ret = toOpusTable[c>>3]; - if (ret == 0) - return -1; - else - return ret|(c&0x7); -} - -static OPUS_INLINE int fromOpus(unsigned char c) -{ - if (c<0x80) - return -1; - else - return fromOpusTable[(c>>3)-16] | (c&0x7); -} -#endif /* CUSTOM_MODES */ - -#define COMBFILTER_MAXPERIOD 1024 -#define COMBFILTER_MINPERIOD 15 - -extern const signed char tf_select_table[4][8]; - -int resampling_factor(opus_int32 rate); - -void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, - int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip); - -void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, - opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, - const opus_val16 *window, int overlap, int arch); - -#ifdef NON_STATIC_COMB_FILTER_CONST_C -void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12); -#endif - -#ifndef OVERRIDE_COMB_FILTER_CONST -# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ - ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12)) -#endif - -void init_caps(const CELTMode *m,int *cap,int LM,int C); - -#ifdef RESYNTH -void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem); -void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], - opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, - int LM, int downsample, int silence); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* CELT_H */ diff --git a/thirdparty/opus/celt/celt_decoder.c b/thirdparty/opus/celt/celt_decoder.c deleted file mode 100644 index b978bb34d1..0000000000 --- a/thirdparty/opus/celt/celt_decoder.c +++ /dev/null @@ -1,1248 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2010 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define CELT_DECODER_C - -#include "cpu_support.h" -#include "os_support.h" -#include "mdct.h" -#include -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include -#include "celt_lpc.h" -#include "vq.h" - -#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) -#define NORM_ALIASING_HACK -#endif -/**********************************************************************/ -/* */ -/* DECODER */ -/* */ -/**********************************************************************/ -#define DECODE_BUFFER_SIZE 2048 - -/** Decoder state - @brief Decoder state - */ -struct OpusCustomDecoder { - const OpusCustomMode *mode; - int overlap; - int channels; - int stream_channels; - - int downsample; - int start, end; - int signalling; - int arch; - - /* Everything beyond this point gets cleared on a reset */ -#define DECODER_RESET_START rng - - opus_uint32 rng; - int error; - int last_pitch_index; - int loss_count; - int skip_plc; - int postfilter_period; - int postfilter_period_old; - opus_val16 postfilter_gain; - opus_val16 postfilter_gain_old; - int postfilter_tapset; - int postfilter_tapset_old; - - celt_sig preemph_memD[2]; - - celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */ - /* opus_val16 lpc[], Size = channels*LPC_ORDER */ - /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */ - /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */ - /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */ - /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */ -}; - -int celt_decoder_get_size(int channels) -{ - const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); - return opus_custom_decoder_get_size(mode, channels); -} - -OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels) -{ - int size = sizeof(struct CELTDecoder) - + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig) - + channels*LPC_ORDER*sizeof(opus_val16) - + 4*2*mode->nbEBands*sizeof(opus_val16); - return size; -} - -#ifdef CUSTOM_MODES -CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error) -{ - int ret; - CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels)); - ret = opus_custom_decoder_init(st, mode, channels); - if (ret != OPUS_OK) - { - opus_custom_decoder_destroy(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} -#endif /* CUSTOM_MODES */ - -int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels) -{ - int ret; - ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels); - if (ret != OPUS_OK) - return ret; - st->downsample = resampling_factor(sampling_rate); - if (st->downsample==0) - return OPUS_BAD_ARG; - else - return OPUS_OK; -} - -OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels) -{ - if (channels < 0 || channels > 2) - return OPUS_BAD_ARG; - - if (st==NULL) - return OPUS_ALLOC_FAIL; - - OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels)); - - st->mode = mode; - st->overlap = mode->overlap; - st->stream_channels = st->channels = channels; - - st->downsample = 1; - st->start = 0; - st->end = st->mode->effEBands; - st->signalling = 1; - st->arch = opus_select_arch(); - - opus_custom_decoder_ctl(st, OPUS_RESET_STATE); - - return OPUS_OK; -} - -#ifdef CUSTOM_MODES -void opus_custom_decoder_destroy(CELTDecoder *st) -{ - opus_free(st); -} -#endif /* CUSTOM_MODES */ - - -#ifndef RESYNTH -static -#endif -void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, - celt_sig *mem, int accum) -{ - int c; - int Nd; - int apply_downsampling=0; - opus_val16 coef0; - VARDECL(celt_sig, scratch); - SAVE_STACK; -#ifndef FIXED_POINT - (void)accum; - celt_assert(accum==0); -#endif - ALLOC(scratch, N, celt_sig); - coef0 = coef[0]; - Nd = N/downsample; - c=0; do { - int j; - celt_sig * OPUS_RESTRICT x; - opus_val16 * OPUS_RESTRICT y; - celt_sig m = mem[c]; - x =in[c]; - y = pcm+c; -#ifdef CUSTOM_MODES - if (coef[1] != 0) - { - opus_val16 coef1 = coef[1]; - opus_val16 coef3 = coef[3]; - for (j=0;j1) - { - /* Shortcut for the standard (non-custom modes) case */ - for (j=0;joverlap; - nbEBands = mode->nbEBands; - N = mode->shortMdctSize<shortMdctSize; - shift = mode->maxLM; - } else { - B = 1; - NB = mode->shortMdctSize<maxLM-LM; - } - - if (CC==2&&C==1) - { - /* Copying a mono streams to two channels */ - celt_sig *freq2; - denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, - downsample, silence); - /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ - freq2 = out_syn[1]+overlap/2; - OPUS_COPY(freq2, freq, N); - for (b=0;bmdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); - for (b=0;bmdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch); - } else if (CC==1&&C==2) - { - /* Downmixing a stereo stream to mono */ - celt_sig *freq2; - freq2 = out_syn[0]+overlap/2; - denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, - downsample, silence); - /* Use the output buffer as temp array before downmixing. */ - denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, - downsample, silence); - for (i=0;imdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); - } else { - /* Normal case (mono or stereo) */ - c=0; do { - denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, - downsample, silence); - for (b=0;bmdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch); - } while (++cstorage*8; - tell = ec_tell(dec); - logp = isTransient ? 2 : 4; - tf_select_rsv = LM>0 && tell+logp+1<=budget; - budget -= tf_select_rsv; - tf_changed = curr = 0; - for (i=start;i>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, - DECODE_BUFFER_SIZE, C, arch); - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; - RESTORE_STACK; - return pitch_index; -} - -static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) -{ - int c; - int i; - const int C = st->channels; - celt_sig *decode_mem[2]; - celt_sig *out_syn[2]; - opus_val16 *lpc; - opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; - const OpusCustomMode *mode; - int nbEBands; - int overlap; - int start; - int loss_count; - int noise_based; - const opus_int16 *eBands; - SAVE_STACK; - - mode = st->mode; - nbEBands = mode->nbEBands; - overlap = mode->overlap; - eBands = mode->eBands; - - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; - } while (++c_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C); - oldBandE = lpc+C*LPC_ORDER; - oldLogE = oldBandE + 2*nbEBands; - oldLogE2 = oldLogE + 2*nbEBands; - backgroundLogE = oldLogE2 + 2*nbEBands; - - loss_count = st->loss_count; - start = st->start; - noise_based = loss_count >= 5 || start != 0 || st->skip_plc; - if (noise_based) - { - /* Noise-based PLC/CNG */ -#ifdef NORM_ALIASING_HACK - celt_norm *X; -#else - VARDECL(celt_norm, X); -#endif - opus_uint32 seed; - int end; - int effEnd; - opus_val16 decay; - end = st->end; - effEnd = IMAX(start, IMIN(end, mode->effEBands)); - -#ifdef NORM_ALIASING_HACK - /* This is an ugly hack that breaks aliasing rules and would be easily broken, - but it saves almost 4kB of stack. */ - X = (celt_norm*)(out_syn[C-1]+overlap/2); -#else - ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ -#endif - - /* Energy decay */ - decay = loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); - c=0; do - { - for (i=start;irng; - for (c=0;c>20); - } - renormalise_vector(X+boffs, blen, Q15ONE, st->arch); - } - } - st->rng = seed; - - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, - DECODE_BUFFER_SIZE-N+(overlap>>1)); - } while (++cdownsample, 0, st->arch); - } else { - /* Pitch-based PLC */ - const opus_val16 *window; - opus_val16 fade = Q15ONE; - int pitch_index; - VARDECL(opus_val32, etmp); - VARDECL(opus_val16, exc); - - if (loss_count == 0) - { - st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); - } else { - pitch_index = st->last_pitch_index; - fade = QCONST16(.8f,15); - } - - ALLOC(etmp, overlap, opus_val32); - ALLOC(exc, MAX_PERIOD, opus_val16); - window = mode->window; - c=0; do { - opus_val16 decay; - opus_val16 attenuation; - opus_val32 S1=0; - celt_sig *buf; - int extrapolation_offset; - int extrapolation_len; - int exc_length; - int j; - - buf = decode_mem[c]; - for (i=0;iarch); - /* Add a noise floor of -40 dB. */ -#ifdef FIXED_POINT - ac[0] += SHR32(ac[0],13); -#else - ac[0] *= 1.0001f; -#endif - /* Use lag windowing to stabilize the Levinson-Durbin recursion. */ - for (i=1;i<=LPC_ORDER;i++) - { - /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ -#ifdef FIXED_POINT - ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); -#else - ac[i] -= ac[i]*(0.008f*0.008f)*i*i; -#endif - } - _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER); - } - /* We want the excitation for 2 pitch periods in order to look for a - decaying signal, but we can't get more than MAX_PERIOD. */ - exc_length = IMIN(2*pitch_index, MAX_PERIOD); - /* Initialize the LPC history with the samples just before the start - of the region for which we're computing the excitation. */ - { - opus_val16 lpc_mem[LPC_ORDER]; - for (i=0;iarch); - } - - /* Check if the waveform is decaying, and if so how fast. - We do this to avoid adding energy when concealing in a segment - with decaying energy. */ - { - opus_val32 E1=1, E2=1; - int decay_length; -#ifdef FIXED_POINT - int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20); -#endif - decay_length = exc_length>>1; - for (i=0;i= pitch_index) { - j -= pitch_index; - attenuation = MULT16_16_Q15(attenuation, decay); - } - buf[DECODE_BUFFER_SIZE-N+i] = - SHL32(EXTEND32(MULT16_16_Q15(attenuation, - exc[extrapolation_offset+j])), SIG_SHIFT); - /* Compute the energy of the previously decoded signal whose - excitation we're copying. */ - tmp = ROUND16( - buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j], - SIG_SHIFT); - S1 += SHR32(MULT16_16(tmp, tmp), 8); - } - - { - opus_val16 lpc_mem[LPC_ORDER]; - /* Copy the last decoded samples (prior to the overlap region) to - synthesis filter memory so we can have a continuous signal. */ - for (i=0;iarch); - } - - /* Check if the synthesis energy is higher than expected, which can - happen with the signal changes during our window. If so, - attenuate. */ - { - opus_val32 S2=0; - for (i=0;i SHR32(S2,2))) -#else - /* The float test is written this way to catch NaNs in the output - of the IIR filter at the same time. */ - if (!(S1 > 0.2f*S2)) -#endif - { - for (i=0;ipostfilter_period, st->postfilter_period, overlap, - -st->postfilter_gain, -st->postfilter_gain, - st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch); - - /* Simulate TDAC on the concealed audio so that it blends with the - MDCT of the next frame. */ - for (i=0;iloss_count = loss_count+1; - - RESTORE_STACK; -} - -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, - int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) -{ - int c, i, N; - int spread_decision; - opus_int32 bits; - ec_dec _dec; -#ifdef NORM_ALIASING_HACK - celt_norm *X; -#else - VARDECL(celt_norm, X); -#endif - VARDECL(int, fine_quant); - VARDECL(int, pulses); - VARDECL(int, cap); - VARDECL(int, offsets); - VARDECL(int, fine_priority); - VARDECL(int, tf_res); - VARDECL(unsigned char, collapse_masks); - celt_sig *decode_mem[2]; - celt_sig *out_syn[2]; - opus_val16 *lpc; - opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; - - int shortBlocks; - int isTransient; - int intra_ener; - const int CC = st->channels; - int LM, M; - int start; - int end; - int effEnd; - int codedBands; - int alloc_trim; - int postfilter_pitch; - opus_val16 postfilter_gain; - int intensity=0; - int dual_stereo=0; - opus_int32 total_bits; - opus_int32 balance; - opus_int32 tell; - int dynalloc_logp; - int postfilter_tapset; - int anti_collapse_rsv; - int anti_collapse_on=0; - int silence; - int C = st->stream_channels; - const OpusCustomMode *mode; - int nbEBands; - int overlap; - const opus_int16 *eBands; - ALLOC_STACK; - - mode = st->mode; - nbEBands = mode->nbEBands; - overlap = mode->overlap; - eBands = mode->eBands; - start = st->start; - end = st->end; - frame_size *= st->downsample; - - lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); - oldBandE = lpc+CC*LPC_ORDER; - oldLogE = oldBandE + 2*nbEBands; - oldLogE2 = oldLogE + 2*nbEBands; - backgroundLogE = oldLogE2 + 2*nbEBands; - -#ifdef CUSTOM_MODES - if (st->signalling && data!=NULL) - { - int data0=data[0]; - /* Convert "standard mode" to Opus header */ - if (mode->Fs==48000 && mode->shortMdctSize==120) - { - data0 = fromOpus(data0); - if (data0<0) - return OPUS_INVALID_PACKET; - } - st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); - LM = (data0>>3)&0x3; - C = 1 + ((data0>>2)&0x1); - data++; - len--; - if (LM>mode->maxLM) - return OPUS_INVALID_PACKET; - if (frame_size < mode->shortMdctSize<shortMdctSize<maxLM;LM++) - if (mode->shortMdctSize<mode->maxLM) - return OPUS_BAD_ARG; - } - M=1<1275 || pcm==NULL) - return OPUS_BAD_ARG; - - N = M*mode->shortMdctSize; - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; - } while (++c mode->effEBands) - effEnd = mode->effEBands; - - if (data == NULL || len<=1) - { - celt_decode_lost(st, N, LM); - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); - RESTORE_STACK; - return frame_size/st->downsample; - } - - /* Check if there are at least two packets received consecutively before - * turning on the pitch-based PLC */ - st->skip_plc = st->loss_count != 0; - - if (dec == NULL) - { - ec_dec_init(&_dec,(unsigned char*)data,len); - dec = &_dec; - } - - if (C==1) - { - for (i=0;i= total_bits) - silence = 1; - else if (tell==1) - silence = ec_dec_bit_logp(dec, 15); - else - silence = 0; - if (silence) - { - /* Pretend we've read all the remaining bits */ - tell = len*8; - dec->nbits_total+=tell-ec_tell(dec); - } - - postfilter_gain = 0; - postfilter_pitch = 0; - postfilter_tapset = 0; - if (start==0 && tell+16 <= total_bits) - { - if(ec_dec_bit_logp(dec, 1)) - { - int qg, octave; - octave = ec_dec_uint(dec, 6); - postfilter_pitch = (16< 0 && tell+3 <= total_bits) - { - isTransient = ec_dec_bit_logp(dec, 3); - tell = ec_tell(dec); - } - else - isTransient = 0; - - if (isTransient) - shortBlocks = M; - else - shortBlocks = 0; - - /* Decode the global flags (first symbols in the stream) */ - intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; - /* Get band energies */ - unquant_coarse_energy(mode, start, end, oldBandE, - intra_ener, dec, C, LM); - - ALLOC(tf_res, nbEBands, int); - tf_decode(start, end, isTransient, tf_res, LM, dec); - - tell = ec_tell(dec); - spread_decision = SPREAD_NORMAL; - if (tell+4 <= total_bits) - spread_decision = ec_dec_icdf(dec, spread_icdf, 5); - - ALLOC(cap, nbEBands, int); - - init_caps(mode,cap,LM,C); - - ALLOC(offsets, nbEBands, int); - - dynalloc_logp = 6; - total_bits<<=BITRES; - tell = ec_tell_frac(dec); - for (i=start;i0) - dynalloc_logp = IMAX(2, dynalloc_logp-1); - } - - ALLOC(fine_quant, nbEBands, int); - alloc_trim = tell+(6<=2&&bits>=((LM+2)<rng, st->arch); - - if (anti_collapse_rsv > 0) - { - anti_collapse_on = ec_dec_bits(dec, 1); - } - - unquant_energy_finalise(mode, start, end, oldBandE, - fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); - - if (anti_collapse_on) - anti_collapse(mode, X, collapse_masks, LM, C, N, - start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch); - - if (silence) - { - for (i=0;idownsample, silence, st->arch); - - c=0; do { - st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); - st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD); - comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize, - st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset, - mode->window, overlap, st->arch); - if (LM!=0) - comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize, - st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset, - mode->window, overlap, st->arch); - - } while (++cpostfilter_period_old = st->postfilter_period; - st->postfilter_gain_old = st->postfilter_gain; - st->postfilter_tapset_old = st->postfilter_tapset; - st->postfilter_period = postfilter_pitch; - st->postfilter_gain = postfilter_gain; - st->postfilter_tapset = postfilter_tapset; - if (LM!=0) - { - st->postfilter_period_old = st->postfilter_period; - st->postfilter_gain_old = st->postfilter_gain; - st->postfilter_tapset_old = st->postfilter_tapset; - } - - if (C==1) - OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); - - /* In case start or end were to change */ - if (!isTransient) - { - opus_val16 max_background_increase; - OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); - OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); - /* In normal circumstances, we only allow the noise floor to increase by - up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB - increase for each update.*/ - if (st->loss_count < 10) - max_background_increase = M*QCONST16(0.001f,DB_SHIFT); - else - max_background_increase = QCONST16(1.f,DB_SHIFT); - for (i=0;i<2*nbEBands;i++) - backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]); - } else { - for (i=0;i<2*nbEBands;i++) - oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); - } - c=0; do - { - for (i=0;irng = dec->rng; - - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); - st->loss_count = 0; - RESTORE_STACK; - if (ec_tell(dec) > 8*len) - return OPUS_INTERNAL_ERROR; - if(ec_get_error(dec)) - st->error = 1; - return frame_size/st->downsample; -} - - -#ifdef CUSTOM_MODES - -#ifdef FIXED_POINT -int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) -{ - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); -} - -#ifndef DISABLE_FLOAT_API -int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) -{ - int j, ret, C, N; - VARDECL(opus_int16, out); - ALLOC_STACK; - - if (pcm==NULL) - return OPUS_BAD_ARG; - - C = st->channels; - N = frame_size; - - ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); - if (ret>0) - for (j=0;jchannels; - N = frame_size; - ALLOC(out, C*N, celt_sig); - - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); - - if (ret>0) - for (j=0;j=st->mode->nbEBands) - goto bad_arg; - st->start = value; - } - break; - case CELT_SET_END_BAND_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>st->mode->nbEBands) - goto bad_arg; - st->end = value; - } - break; - case CELT_SET_CHANNELS_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>2) - goto bad_arg; - st->stream_channels = value; - } - break; - case CELT_GET_AND_CLEAR_ERROR_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) - goto bad_arg; - *value=st->error; - st->error = 0; - } - break; - case OPUS_GET_LOOKAHEAD_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) - goto bad_arg; - *value = st->overlap/st->downsample; - } - break; - case OPUS_RESET_STATE: - { - int i; - opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2; - lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels); - oldBandE = lpc+st->channels*LPC_ORDER; - oldLogE = oldBandE + 2*st->mode->nbEBands; - oldLogE2 = oldLogE + 2*st->mode->nbEBands; - OPUS_CLEAR((char*)&st->DECODER_RESET_START, - opus_custom_decoder_get_size(st->mode, st->channels)- - ((char*)&st->DECODER_RESET_START - (char*)st)); - for (i=0;i<2*st->mode->nbEBands;i++) - oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); - st->skip_plc = 1; - } - break; - case OPUS_GET_PITCH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) - goto bad_arg; - *value = st->postfilter_period; - } - break; - case CELT_GET_MODE_REQUEST: - { - const CELTMode ** value = va_arg(ap, const CELTMode**); - if (value==0) - goto bad_arg; - *value=st->mode; - } - break; - case CELT_SET_SIGNALLING_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->signalling = value; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 * value = va_arg(ap, opus_uint32 *); - if (value==0) - goto bad_arg; - *value=st->rng; - } - break; - default: - goto bad_request; - } - va_end(ap); - return OPUS_OK; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -bad_request: - va_end(ap); - return OPUS_UNIMPLEMENTED; -} diff --git a/thirdparty/opus/celt/celt_encoder.c b/thirdparty/opus/celt/celt_encoder.c deleted file mode 100644 index 3ee7a4d3f7..0000000000 --- a/thirdparty/opus/celt/celt_encoder.c +++ /dev/null @@ -1,2410 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2010 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define CELT_ENCODER_C - -#include "cpu_support.h" -#include "os_support.h" -#include "mdct.h" -#include -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include -#include "celt_lpc.h" -#include "vq.h" - - -/** Encoder state - @brief Encoder state - */ -struct OpusCustomEncoder { - const OpusCustomMode *mode; /**< Mode used by the encoder */ - int channels; - int stream_channels; - - int force_intra; - int clip; - int disable_pf; - int complexity; - int upsample; - int start, end; - - opus_int32 bitrate; - int vbr; - int signalling; - int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ - int loss_rate; - int lsb_depth; - int variable_duration; - int lfe; - int arch; - - /* Everything beyond this point gets cleared on a reset */ -#define ENCODER_RESET_START rng - - opus_uint32 rng; - int spread_decision; - opus_val32 delayedIntra; - int tonal_average; - int lastCodedBands; - int hf_average; - int tapset_decision; - - int prefilter_period; - opus_val16 prefilter_gain; - int prefilter_tapset; -#ifdef RESYNTH - int prefilter_period_old; - opus_val16 prefilter_gain_old; - int prefilter_tapset_old; -#endif - int consec_transient; - AnalysisInfo analysis; - - opus_val32 preemph_memE[2]; - opus_val32 preemph_memD[2]; - - /* VBR-related parameters */ - opus_int32 vbr_reservoir; - opus_int32 vbr_drift; - opus_int32 vbr_offset; - opus_int32 vbr_count; - opus_val32 overlap_max; - opus_val16 stereo_saving; - int intensity; - opus_val16 *energy_mask; - opus_val16 spec_avg; - -#ifdef RESYNTH - /* +MAX_PERIOD/2 to make space for overlap */ - celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2]; -#endif - - celt_sig in_mem[1]; /* Size = channels*mode->overlap */ - /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */ - /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */ - /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */ - /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */ -}; - -int celt_encoder_get_size(int channels) -{ - CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); - return opus_custom_encoder_get_size(mode, channels); -} - -OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels) -{ - int size = sizeof(struct CELTEncoder) - + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */ - + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */ - + 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ - /* opus_val16 oldLogE[channels*mode->nbEBands]; */ - /* opus_val16 oldLogE2[channels*mode->nbEBands]; */ - return size; -} - -#ifdef CUSTOM_MODES -CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error) -{ - int ret; - CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels)); - /* init will handle the NULL case */ - ret = opus_custom_encoder_init(st, mode, channels); - if (ret != OPUS_OK) - { - opus_custom_encoder_destroy(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} -#endif /* CUSTOM_MODES */ - -static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, - int channels, int arch) -{ - if (channels < 0 || channels > 2) - return OPUS_BAD_ARG; - - if (st==NULL || mode==NULL) - return OPUS_ALLOC_FAIL; - - OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); - - st->mode = mode; - st->stream_channels = st->channels = channels; - - st->upsample = 1; - st->start = 0; - st->end = st->mode->effEBands; - st->signalling = 1; - - st->arch = arch; - - st->constrained_vbr = 1; - st->clip = 1; - - st->bitrate = OPUS_BITRATE_MAX; - st->vbr = 0; - st->force_intra = 0; - st->complexity = 5; - st->lsb_depth=24; - - opus_custom_encoder_ctl(st, OPUS_RESET_STATE); - - return OPUS_OK; -} - -#ifdef CUSTOM_MODES -int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) -{ - return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch()); -} -#endif - -int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, - int arch) -{ - int ret; - ret = opus_custom_encoder_init_arch(st, - opus_custom_mode_create(48000, 960, NULL), channels, arch); - if (ret != OPUS_OK) - return ret; - st->upsample = resampling_factor(sampling_rate); - return OPUS_OK; -} - -#ifdef CUSTOM_MODES -void opus_custom_encoder_destroy(CELTEncoder *st) -{ - opus_free(st); -} -#endif /* CUSTOM_MODES */ - - -static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, - opus_val16 *tf_estimate, int *tf_chan) -{ - int i; - VARDECL(opus_val16, tmp); - opus_val32 mem0,mem1; - int is_transient = 0; - opus_int32 mask_metric = 0; - int c; - opus_val16 tf_max; - int len2; - /* Table of 6*64/x, trained on real data to minimize the average error */ - static const unsigned char inv_table[128] = { - 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25, - 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12, - 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, - 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, - }; - SAVE_STACK; - ALLOC(tmp, len, opus_val16); - - len2=len/2; - for (c=0;c=0;i--) - { -#ifdef FIXED_POINT - /* FIXME: Use PSHR16() instead */ - tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3); -#else - tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0); -#endif - mem0 = tmp[i]; - maxE = MAX16(maxE, mem0); - } - /*for (i=0;i>1))); -#else - mean = celt_sqrt(mean * maxE*.5*len2); -#endif - /* Inverse of the mean energy in Q15+6 */ - norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1)); - /* Compute harmonic mean discarding the unreliable boundaries - The data is smooth, so we only take 1/4th of the samples */ - unmask=0; - for (i=12;imask_metric) - { - *tf_chan = c; - mask_metric = unmask; - } - } - is_transient = mask_metric>200; - - /* Arbitrary metric for VBR boost */ - tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); - /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ - *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); - /*printf("%d %f\n", tf_max, mask_metric);*/ - RESTORE_STACK; -#ifdef FUZZING - is_transient = rand()&0x1; -#endif - /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/ - return is_transient; -} - -/* Looks for sudden increases of energy to decide whether we need to patch - the transient decision */ -static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, - int start, int end, int C) -{ - int i, c; - opus_val32 mean_diff=0; - opus_val16 spread_old[26]; - /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to - avoid false detection caused by irrelevant bands */ - if (C==1) - { - spread_old[start] = oldE[start]; - for (i=start+1;i=start;i--) - spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); - /* Compute mean increase */ - c=0; do { - for (i=IMAX(2,start);i QCONST16(1.f, DB_SHIFT); -} - -/** Apply window and compute the MDCT for all sub-frames and - all channels in a frame */ -static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, - celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, - int arch) -{ - const int overlap = mode->overlap; - int N; - int B; - int shift; - int i, b, c; - if (shortBlocks) - { - B = shortBlocks; - N = mode->shortMdctSize; - shift = mode->maxLM; - } else { - B = 1; - N = mode->shortMdctSize<maxLM-LM; - } - c=0; do { - for (b=0;bmdct, in+c*(B*N+overlap)+b*N, - &out[b+c*N*B], mode->window, overlap, shift, B, - arch); - } - } while (++ceBands[len]-m->eBands[len-1])<eBands[len]-m->eBands[len-1])<eBands[i+1]-m->eBands[i])<eBands[i+1]-m->eBands[i])==1; - OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<eBands[i]<>LM, 1<>k, 1<=0;i--) - { - if (tf_res[i+1] == 1) - tf_res[i] = path1[i+1]; - else - tf_res[i] = path0[i+1]; - } - /*printf("%d %f\n", *tf_sum, tf_estimate);*/ - RESTORE_STACK; -#ifdef FUZZING - tf_select = rand()&0x1; - tf_res[0] = rand()&0x1; - for (i=1;istorage*8; - tell = ec_tell(enc); - logp = isTransient ? 2 : 4; - /* Reserve space to code the tf_select decision. */ - tf_select_rsv = LM>0 && tell+logp+1 <= budget; - budget -= tf_select_rsv; - curr = tf_changed = 0; - for (i=start;ieBands[i]<eBands[i]<eBands[i+1]-m->eBands[i])<eBands[i]<eBands[i]<eBands[i+1]-m->eBands[i])<nbEBands]*(opus_int32)(2+2*i-end); - } - } while (++cvalid) - { - trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), - (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); - } -#else - (void)analysis; -#endif - -#ifdef FIXED_POINT - trim_index = PSHR32(trim, 8); -#else - trim_index = (int)floor(.5f+trim); -#endif - trim_index = IMAX(0, IMIN(10, trim_index)); - /*printf("%d\n", trim_index);*/ -#ifdef FUZZING - trim_index = rand()%11; -#endif - return trim_index; -} - -static int stereo_analysis(const CELTMode *m, const celt_norm *X, - int LM, int N0) -{ - int i; - int thetas; - opus_val32 sumLR = EPSILON, sumMS = EPSILON; - - /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */ - for (i=0;i<13;i++) - { - int j; - for (j=m->eBands[i]<eBands[i+1]<eBands[13]<<(LM+1))+thetas, sumMS) - > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); -} - -#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0) -static opus_val16 median_of_5(const opus_val16 *x) -{ - opus_val16 t0, t1, t2, t3, t4; - t2 = x[2]; - if (x[0] > x[1]) - { - t0 = x[1]; - t1 = x[0]; - } else { - t0 = x[0]; - t1 = x[1]; - } - if (x[3] > x[4]) - { - t3 = x[4]; - t4 = x[3]; - } else { - t3 = x[3]; - t4 = x[4]; - } - if (t0 > t3) - { - MSWAP(t0, t3); - MSWAP(t1, t4); - } - if (t2 > t1) - { - if (t1 < t3) - return MIN16(t2, t3); - else - return MIN16(t4, t1); - } else { - if (t2 < t3) - return MIN16(t1, t3); - else - return MIN16(t2, t4); - } -} - -static opus_val16 median_of_3(const opus_val16 *x) -{ - opus_val16 t0, t1, t2; - if (x[0] > x[1]) - { - t0 = x[1]; - t1 = x[0]; - } else { - t0 = x[0]; - t1 = x[1]; - } - t2 = x[2]; - if (t1 < t2) - return t1; - else if (t0 < t2) - return t2; - else - return t0; -} - -static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, - int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, - int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, - int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) -{ - int i, c; - opus_int32 tot_boost=0; - opus_val16 maxDepth; - VARDECL(opus_val16, follower); - VARDECL(opus_val16, noise_floor); - SAVE_STACK; - ALLOC(follower, C*nbEBands, opus_val16); - ALLOC(noise_floor, C*nbEBands, opus_val16); - OPUS_CLEAR(offsets, nbEBands); - /* Dynamic allocation code */ - maxDepth=-QCONST16(31.9f, DB_SHIFT); - for (i=0;i 50 && LM>=1 && !lfe) - { - int last=0; - c=0;do - { - opus_val16 offset; - opus_val16 tmp; - opus_val16 *f; - f = &follower[c*nbEBands]; - f[0] = bandLogE2[c*nbEBands]; - for (i=1;i bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) - last=i; - f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); - } - for (i=last-1;i>=0;i--) - f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); - - /* Combine with a median filter to avoid dynalloc triggering unnecessarily. - The "offset" value controls how conservative we are -- a higher offset - reduces the impact of the median filter and makes dynalloc use more bits. */ - offset = QCONST16(1.f, DB_SHIFT); - for (i=2;i=12) - follower[i] = HALF16(follower[i]); - follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT)); - - width = C*(eBands[i+1]-eBands[i])< 48) { - boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT); - boost_bits = (boost*width<>BITRES>>3 > effectiveBytes/4) - { - opus_int32 cap = ((effectiveBytes/4)<mode; - overlap = mode->overlap; - ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); - - pre[0] = _pre; - pre[1] = _pre + (N+COMBFILTER_MAXPERIOD); - - - c=0; do { - OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); - OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N); - } while (++c>1, opus_val16); - - pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch); - /* Don't search for the fir last 1.5 octave of the range because - there's too many false-positives due to short-term correlation */ - pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, - COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index, - st->arch); - pitch_index = COMBFILTER_MAXPERIOD-pitch_index; - - gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, - N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch); - if (pitch_index > COMBFILTER_MAXPERIOD-2) - pitch_index = COMBFILTER_MAXPERIOD-2; - gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); - /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/ - if (st->loss_rate>2) - gain1 = HALF32(gain1); - if (st->loss_rate>4) - gain1 = HALF32(gain1); - if (st->loss_rate>8) - gain1 = 0; - } else { - gain1 = 0; - pitch_index = COMBFILTER_MINPERIOD; - } - - /* Gain threshold for enabling the prefilter/postfilter */ - pf_threshold = QCONST16(.2f,15); - - /* Adjusting the threshold based on rate and continuity */ - if (abs(pitch_index-st->prefilter_period)*10>pitch_index) - pf_threshold += QCONST16(.2f,15); - if (nbAvailableBytes<25) - pf_threshold += QCONST16(.1f,15); - if (nbAvailableBytes<35) - pf_threshold += QCONST16(.1f,15); - if (st->prefilter_gain > QCONST16(.4f,15)) - pf_threshold -= QCONST16(.1f,15); - if (st->prefilter_gain > QCONST16(.55f,15)) - pf_threshold -= QCONST16(.1f,15); - - /* Hard threshold at 0.2 */ - pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15)); - if (gain1prefilter_gain)prefilter_gain; - -#ifdef FIXED_POINT - qg = ((gain1+1536)>>10)/3-1; -#else - qg = (int)floor(.5f+gain1*32/3)-1; -#endif - qg = IMAX(0, IMIN(7, qg)); - gain1 = QCONST16(0.09375f,15)*(qg+1); - pf_on = 1; - } - /*printf("%d %f\n", pitch_index, gain1);*/ - - c=0; do { - int offset = mode->shortMdctSize-overlap; - st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); - OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap); - if (offset) - comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD, - st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, - st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch); - - comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, - st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, - st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); - OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap); - - if (N>COMBFILTER_MAXPERIOD) - { - OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); - } else { - OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); - OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); - } - } while (++cnbEBands; - eBands = mode->eBands; - - coded_bands = lastCodedBands ? lastCodedBands : nbEBands; - coded_bins = eBands[coded_bands]<analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ -#ifndef DISABLE_FLOAT_API - if (analysis->valid && analysis->activity<.4) - target -= (opus_int32)((coded_bins<activity)); -#endif - /* Stereo savings */ - if (C==2) - { - int coded_stereo_bands; - int coded_stereo_dof; - opus_val16 max_frac; - coded_stereo_bands = IMIN(intensity, coded_bands); - coded_stereo_dof = (eBands[coded_stereo_bands]<valid && !lfe) - { - opus_int32 tonal_target; - float tonal; - - /* Tonality boost (compensating for the average). */ - tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f; - tonal_target = target + (opus_int32)((coded_bins<tonality, tonal);*/ - target = tonal_target; - } -#else - (void)analysis; - (void)pitch_change; -#endif - - if (has_surround_mask&&!lfe) - { - opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<end, st->intensity, surround_target, target, st->bitrate);*/ - target = IMAX(target/4, surround_target); - } - - { - opus_int32 floor_depth; - int bins; - bins = eBands[nbEBands-2]<>2); - target = IMIN(target, floor_depth); - /*printf("%f %d\n", maxDepth, floor_depth);*/ - } - - if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) - { - opus_val16 rate_factor = Q15ONE; - if (bitrate < 64000) - { -#ifdef FIXED_POINT - rate_factor = MAX16(0,(bitrate-32000)); -#else - rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); -#endif - } - if (constrained_vbr) - rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); - target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); - - } - - if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14)) - { - opus_val16 amount; - opus_val16 tvbr_factor; - amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate))); - tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT); - target += (opus_int32)MULT16_32_Q15(tvbr_factor, target); - } - - /* Don't allow more than doubling the rate */ - target = IMIN(2*base_target, target); - - return target; -} - -int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc) -{ - int i, c, N; - opus_int32 bits; - ec_enc _enc; - VARDECL(celt_sig, in); - VARDECL(celt_sig, freq); - VARDECL(celt_norm, X); - VARDECL(celt_ener, bandE); - VARDECL(opus_val16, bandLogE); - VARDECL(opus_val16, bandLogE2); - VARDECL(int, fine_quant); - VARDECL(opus_val16, error); - VARDECL(int, pulses); - VARDECL(int, cap); - VARDECL(int, offsets); - VARDECL(int, fine_priority); - VARDECL(int, tf_res); - VARDECL(unsigned char, collapse_masks); - celt_sig *prefilter_mem; - opus_val16 *oldBandE, *oldLogE, *oldLogE2; - int shortBlocks=0; - int isTransient=0; - const int CC = st->channels; - const int C = st->stream_channels; - int LM, M; - int tf_select; - int nbFilledBytes, nbAvailableBytes; - int start; - int end; - int effEnd; - int codedBands; - int tf_sum; - int alloc_trim; - int pitch_index=COMBFILTER_MINPERIOD; - opus_val16 gain1 = 0; - int dual_stereo=0; - int effectiveBytes; - int dynalloc_logp; - opus_int32 vbr_rate; - opus_int32 total_bits; - opus_int32 total_boost; - opus_int32 balance; - opus_int32 tell; - int prefilter_tapset=0; - int pf_on; - int anti_collapse_rsv; - int anti_collapse_on=0; - int silence=0; - int tf_chan = 0; - opus_val16 tf_estimate; - int pitch_change=0; - opus_int32 tot_boost; - opus_val32 sample_max; - opus_val16 maxDepth; - const OpusCustomMode *mode; - int nbEBands; - int overlap; - const opus_int16 *eBands; - int secondMdct; - int signalBandwidth; - int transient_got_disabled=0; - opus_val16 surround_masking=0; - opus_val16 temporal_vbr=0; - opus_val16 surround_trim = 0; - opus_int32 equiv_rate = 510000; - VARDECL(opus_val16, surround_dynalloc); - ALLOC_STACK; - - mode = st->mode; - nbEBands = mode->nbEBands; - overlap = mode->overlap; - eBands = mode->eBands; - start = st->start; - end = st->end; - tf_estimate = 0; - if (nbCompressedBytes<2 || pcm==NULL) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - frame_size *= st->upsample; - for (LM=0;LM<=mode->maxLM;LM++) - if (mode->shortMdctSize<mode->maxLM) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - M=1<shortMdctSize; - - prefilter_mem = st->in_mem+CC*(overlap); - oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); - oldLogE = oldBandE + CC*nbEBands; - oldLogE2 = oldLogE + CC*nbEBands; - - if (enc==NULL) - { - tell=1; - nbFilledBytes=0; - } else { - tell=ec_tell(enc); - nbFilledBytes=(tell+4)>>3; - } - -#ifdef CUSTOM_MODES - if (st->signalling && enc==NULL) - { - int tmp = (mode->effEBands-end)>>1; - end = st->end = IMAX(1, mode->effEBands-tmp); - compressed[0] = tmp<<5; - compressed[0] |= LM<<3; - compressed[0] |= (C==2)<<2; - /* Convert "standard mode" to Opus header */ - if (mode->Fs==48000 && mode->shortMdctSize==120) - { - int c0 = toOpus(compressed[0]); - if (c0<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - compressed[0] = c0; - } - compressed++; - nbCompressedBytes--; - } -#else - celt_assert(st->signalling==0); -#endif - - /* Can't produce more than 1275 output bytes */ - nbCompressedBytes = IMIN(nbCompressedBytes,1275); - nbAvailableBytes = nbCompressedBytes - nbFilledBytes; - - if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX) - { - opus_int32 den=mode->Fs>>BITRES; - vbr_rate=(st->bitrate*frame_size+(den>>1))/den; -#ifdef CUSTOM_MODES - if (st->signalling) - vbr_rate -= 8<>(3+BITRES); - } else { - opus_int32 tmp; - vbr_rate = 0; - tmp = st->bitrate*frame_size; - if (tell>1) - tmp += tell; - if (st->bitrate!=OPUS_BITRATE_MAX) - nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, - (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); - effectiveBytes = nbCompressedBytes; - } - if (st->bitrate != OPUS_BITRATE_MAX) - equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50); - - if (enc==NULL) - { - ec_enc_init(&_enc, compressed, nbCompressedBytes); - enc = &_enc; - } - - if (vbr_rate>0) - { - /* Computes the max bit-rate allowed in VBR mode to avoid violating the - target rate and buffering. - We must do this up front so that bust-prevention logic triggers - correctly if we don't have enough bits. */ - if (st->constrained_vbr) - { - opus_int32 vbr_bound; - opus_int32 max_allowed; - /* We could use any multiple of vbr_rate as bound (depending on the - delay). - This is clamped to ensure we use at least two bytes if the encoder - was entirely empty, but to allow 0 in hybrid mode. */ - vbr_bound = vbr_rate; - max_allowed = IMIN(IMAX(tell==1?2:0, - (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)), - nbAvailableBytes); - if(max_allowed < nbAvailableBytes) - { - nbCompressedBytes = nbFilledBytes+max_allowed; - nbAvailableBytes = max_allowed; - ec_enc_shrink(enc, nbCompressedBytes); - } - } - } - total_bits = nbCompressedBytes*8; - - effEnd = end; - if (effEnd > mode->effEBands) - effEnd = mode->effEBands; - - ALLOC(in, CC*(N+overlap), celt_sig); - - sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample)); - st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); - sample_max=MAX32(sample_max, st->overlap_max); -#ifdef FIXED_POINT - silence = (sample_max==0); -#else - silence = (sample_max <= (opus_val16)1/(1<lsb_depth)); -#endif -#ifdef FUZZING - if ((rand()&0x3F)==0) - silence = 1; -#endif - if (tell==1) - ec_enc_bit_logp(enc, silence, 15); - else - silence=0; - if (silence) - { - /*In VBR mode there is no need to send more than the minimum. */ - if (vbr_rate>0) - { - effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2); - total_bits=nbCompressedBytes*8; - nbAvailableBytes=2; - ec_enc_shrink(enc, nbCompressedBytes); - } - /* Pretend we've filled all the remaining bits with zeros - (that's what the initialiser did anyway) */ - tell = nbCompressedBytes*8; - enc->nbits_total+=tell-ec_tell(enc); - } - c=0; do { - int need_clip=0; -#ifndef FIXED_POINT - need_clip = st->clip && sample_max>65536.f; -#endif - celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample, - mode->preemph, st->preemph_memE+c, need_clip); - } while (++clfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf - && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); - - prefilter_tapset = st->tapset_decision; - pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); - if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3) - && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) - pitch_change = 1; - if (pf_on==0) - { - if(start==0 && tell+16<=total_bits) - ec_enc_bit_logp(enc, 0, 1); - } else { - /*This block is not gated by a total bits check only because - of the nbAvailableBytes check above.*/ - int octave; - ec_enc_bit_logp(enc, 1, 1); - pitch_index += 1; - octave = EC_ILOG(pitch_index)-5; - ec_enc_uint(enc, octave, 6); - ec_enc_bits(enc, pitch_index-(16<complexity >= 1 && !st->lfe) - { - isTransient = transient_analysis(in, N+overlap, CC, - &tf_estimate, &tf_chan); - } - if (LM>0 && ec_tell(enc)+3<=total_bits) - { - if (isTransient) - shortBlocks = M; - } else { - isTransient = 0; - transient_got_disabled=1; - } - - ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */ - ALLOC(bandE,nbEBands*CC, celt_ener); - ALLOC(bandLogE,nbEBands*CC, opus_val16); - - secondMdct = shortBlocks && st->complexity>=8; - ALLOC(bandLogE2, C*nbEBands, opus_val16); - if (secondMdct) - { - compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); - compute_band_energies(mode, freq, bandE, effEnd, C, LM); - amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); - for (i=0;iupsample, st->arch); - if (CC==2&&C==1) - tf_chan = 0; - compute_band_energies(mode, freq, bandE, effEnd, C, LM); - - if (st->lfe) - { - for (i=2;ienergy_mask&&!st->lfe) - { - int mask_end; - int midband; - int count_dynalloc; - opus_val32 mask_avg=0; - opus_val32 diff=0; - int count=0; - mask_end = IMAX(2,st->lastCodedBands); - for (c=0;cenergy_mask[nbEBands*c+i], - QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); - if (mask > 0) - mask = HALF16(mask); - mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]); - count += eBands[i+1]-eBands[i]; - diff += MULT16_16(mask, 1+2*i-mask_end); - } - } - celt_assert(count>0); - mask_avg = DIV32_16(mask_avg,count); - mask_avg += QCONST16(.2f, DB_SHIFT); - diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); - /* Again, being conservative */ - diff = HALF32(diff); - diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); - /* Find the band that's in the middle of the coded spectrum */ - for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); - count_dynalloc=0; - for(i=0;ienergy_mask[i], st->energy_mask[nbEBands+i]); - else - unmask = st->energy_mask[i]; - unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT)); - unmask -= lin; - if (unmask > QCONST16(.25f, DB_SHIFT)) - { - surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); - count_dynalloc++; - } - } - if (count_dynalloc>=3) - { - /* If we need dynalloc in many bands, it's probably because our - initial masking rate was too low. */ - mask_avg += QCONST16(.25f, DB_SHIFT); - if (mask_avg>0) - { - /* Something went really wrong in the original calculations, - disabling masking. */ - mask_avg = 0; - diff = 0; - OPUS_CLEAR(surround_dynalloc, mask_end); - } else { - for(i=0;ilfe) - { - opus_val16 follow=-QCONST16(10.0f,DB_SHIFT); - opus_val32 frame_avg=0; - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; - for(i=start;ispec_avg); - temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr)); - st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr); - } - /*for (i=0;i<21;i++) - printf("%f ", bandLogE[i]); - printf("\n");*/ - - if (!secondMdct) - { - OPUS_COPY(bandLogE2, bandLogE, C*nbEBands); - } - - /* Last chance to catch any transient we might have missed in the - time-domain analysis */ - if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) - { - if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) - { - isTransient = 1; - shortBlocks = M; - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); - compute_band_energies(mode, freq, bandE, effEnd, C, LM); - amp2Log2(mode, effEnd, end, bandE, bandLogE, C); - /* Compensate for the scaling of short vs long mdcts */ - for (i=0;i0 && ec_tell(enc)+3<=total_bits) - ec_enc_bit_logp(enc, isTransient, 3); - - ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ - - /* Band normalisation */ - normalise_bands(mode, freq, X, bandE, effEnd, C, M); - - ALLOC(tf_res, nbEBands, int); - /* Disable variable tf resolution for hybrid and at very low bitrate */ - if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe) - { - int lambda; - if (effectiveBytes<40) - lambda = 12; - else if (effectiveBytes<60) - lambda = 6; - else if (effectiveBytes<100) - lambda = 4; - else - lambda = 3; - lambda*=2; - tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); - for (i=effEnd;iforce_intra, - &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); - - tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc); - - if (ec_tell(enc)+4<=total_bits) - { - if (st->lfe) - { - st->tapset_decision = 0; - st->spread_decision = SPREAD_NORMAL; - } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0) - { - if (st->complexity == 0) - st->spread_decision = SPREAD_NONE; - else - st->spread_decision = SPREAD_NORMAL; - } else { - /* Disable new spreading+tapset estimator until we can show it works - better than the old one. So far it seems like spreading_decision() - works best. */ -#if 0 - if (st->analysis.valid) - { - static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)}; - static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)}; - static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)}; - static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)}; - st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision); - st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision); - } else -#endif - { - st->spread_decision = spreading_decision(mode, X, - &st->tonal_average, st->spread_decision, &st->hf_average, - &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); - } - /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ - /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ - } - ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); - } - - ALLOC(offsets, nbEBands, int); - - maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, - st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); - /* For LFE, everything interesting is in the first band */ - if (st->lfe) - offsets[0] = IMIN(8, effectiveBytes/3); - ALLOC(cap, nbEBands, int); - init_caps(mode,cap,LM,C); - - dynalloc_logp = 6; - total_bits<<=BITRES; - total_boost = 0; - tell = ec_tell_frac(enc); - for (i=start;iintensity = hysteresis_decision((opus_val16)(equiv_rate/1000), - intensity_thresholds, intensity_histeresis, 21, st->intensity); - st->intensity = IMIN(end,IMAX(start, st->intensity)); - } - - alloc_trim = 5; - if (tell+(6<lfe) - alloc_trim = 5; - else - alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, - st->intensity, surround_trim, st->arch); - ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); - tell = ec_tell_frac(enc); - } - - /* Variable bitrate */ - if (vbr_rate>0) - { - opus_val16 alpha; - opus_int32 delta; - /* The target rate in 8th bits per frame */ - opus_int32 target, base_target; - opus_int32 min_allowed; - int lm_diff = mode->maxLM - LM; - - /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. - The CELT allocator will just not be able to use more than that anyway. */ - nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); - base_target = vbr_rate - ((40*C+20)<constrained_vbr) - base_target += (st->vbr_offset>>lm_diff); - - target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, - st->lastCodedBands, C, st->intensity, st->constrained_vbr, - st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, - st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking, - temporal_vbr); - - /* The current offset is removed from the target and the space used - so far is added*/ - target=target+tell; - /* In VBR mode the frame size must not be reduced so much that it would - result in the encoder running out of bits. - The margin of 2 bytes ensures that none of the bust-prevention logic - in the decoder will have triggered so far. */ - min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes; - - nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); - nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); - nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes; - - /* By how much did we "miss" the target on that frame */ - delta = target - vbr_rate; - - target=nbAvailableBytes<<(BITRES+3); - - /*If the frame is silent we don't adjust our drift, otherwise - the encoder will shoot to very high rates after hitting a - span of silence, but we do allow the bitres to refill. - This means that we'll undershoot our target in CVBR/VBR modes - on files with lots of silence. */ - if(silence) - { - nbAvailableBytes = 2; - target = 2*8<vbr_count < 970) - { - st->vbr_count++; - alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16)); - } else - alpha = QCONST16(.001f,15); - /* How many bits have we used in excess of what we're allowed */ - if (st->constrained_vbr) - st->vbr_reservoir += target - vbr_rate; - /*printf ("%d\n", st->vbr_reservoir);*/ - - /* Compute the offset we need to apply in order to reach the target */ - if (st->constrained_vbr) - { - st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<vbr_offset-st->vbr_drift); - st->vbr_offset = -st->vbr_drift; - } - /*printf ("%d\n", st->vbr_drift);*/ - - if (st->constrained_vbr && st->vbr_reservoir < 0) - { - /* We're under the min value -- increase rate */ - int adjust = (-st->vbr_reservoir)/(8<vbr_reservoir = 0; - /*printf ("+%d\n", adjust);*/ - } - nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes); - /*printf("%d\n", nbCompressedBytes*50*8);*/ - /* This moves the raw bits to take into account the new compressed size */ - ec_enc_shrink(enc, nbCompressedBytes); - } - - /* Bit allocation */ - ALLOC(fine_quant, nbEBands, int); - ALLOC(pulses, nbEBands, int); - ALLOC(fine_priority, nbEBands, int); - - /* bits = packet size - where we are - safety*/ - bits = (((opus_int32)nbCompressedBytes*8)<=2&&bits>=((LM+2)<analysis.valid) - { - int min_bandwidth; - if (equiv_rate < (opus_int32)32000*C) - min_bandwidth = 13; - else if (equiv_rate < (opus_int32)48000*C) - min_bandwidth = 16; - else if (equiv_rate < (opus_int32)60000*C) - min_bandwidth = 18; - else if (equiv_rate < (opus_int32)80000*C) - min_bandwidth = 19; - else - min_bandwidth = 20; - signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth); - } -#endif - if (st->lfe) - signalBandwidth = 1; - codedBands = compute_allocation(mode, start, end, offsets, cap, - alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, - fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); - if (st->lastCodedBands) - st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands)); - else - st->lastCodedBands = codedBands; - - quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C); - - /* Residual quantisation */ - ALLOC(collapse_masks, C*nbEBands, unsigned char); - quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, - bandE, pulses, shortBlocks, st->spread_decision, - dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<rng, st->arch); - - if (anti_collapse_rsv > 0) - { - anti_collapse_on = st->consec_transient<2; -#ifdef FUZZING - anti_collapse_on = rand()&0x1; -#endif - ec_enc_bits(enc, anti_collapse_on, 1); - } - quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); - - if (silence) - { - for (i=0;irng); - } - - c=0; do { - OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2); - } while (++csyn_mem[c]+2*MAX_PERIOD-N; - } while (++cupsample, silence, st->arch); - - c=0; do { - st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); - st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); - comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, - st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, - mode->window, overlap); - if (LM!=0) - comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, - st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, - mode->window, overlap); - } while (++cupsample, mode->preemph, st->preemph_memD); - st->prefilter_period_old = st->prefilter_period; - st->prefilter_gain_old = st->prefilter_gain; - st->prefilter_tapset_old = st->prefilter_tapset; - } -#endif - - st->prefilter_period = pitch_index; - st->prefilter_gain = gain1; - st->prefilter_tapset = prefilter_tapset; -#ifdef RESYNTH - if (LM!=0) - { - st->prefilter_period_old = st->prefilter_period; - st->prefilter_gain_old = st->prefilter_gain; - st->prefilter_tapset_old = st->prefilter_tapset; - } -#endif - - if (CC==2&&C==1) { - OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); - } - - if (!isTransient) - { - OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands); - OPUS_COPY(oldLogE, oldBandE, CC*nbEBands); - } else { - for (i=0;iconsec_transient++; - else - st->consec_transient=0; - st->rng = enc->rng; - - /* If there's any room left (can only happen for very high rates), - it's already filled with zeros */ - ec_enc_done(enc); - -#ifdef CUSTOM_MODES - if (st->signalling) - nbCompressedBytes++; -#endif - - RESTORE_STACK; - if (ec_get_error(enc)) - return OPUS_INTERNAL_ERROR; - else - return nbCompressedBytes; -} - - -#ifdef CUSTOM_MODES - -#ifdef FIXED_POINT -int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) -{ - return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); -} - -#ifndef DISABLE_FLOAT_API -int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) -{ - int j, ret, C, N; - VARDECL(opus_int16, in); - ALLOC_STACK; - - if (pcm==NULL) - return OPUS_BAD_ARG; - - C = st->channels; - N = frame_size; - ALLOC(in, C*N, opus_int16); - - for (j=0;jchannels; - N=frame_size; - ALLOC(in, C*N, celt_sig); - for (j=0;j10) - goto bad_arg; - st->complexity = value; - } - break; - case CELT_SET_START_BAND_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<0 || value>=st->mode->nbEBands) - goto bad_arg; - st->start = value; - } - break; - case CELT_SET_END_BAND_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>st->mode->nbEBands) - goto bad_arg; - st->end = value; - } - break; - case CELT_SET_PREDICTION_REQUEST: - { - int value = va_arg(ap, opus_int32); - if (value<0 || value>2) - goto bad_arg; - st->disable_pf = value<=1; - st->force_intra = value==0; - } - break; - case OPUS_SET_PACKET_LOSS_PERC_REQUEST: - { - int value = va_arg(ap, opus_int32); - if (value<0 || value>100) - goto bad_arg; - st->loss_rate = value; - } - break; - case OPUS_SET_VBR_CONSTRAINT_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->constrained_vbr = value; - } - break; - case OPUS_SET_VBR_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->vbr = value; - } - break; - case OPUS_SET_BITRATE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<=500 && value!=OPUS_BITRATE_MAX) - goto bad_arg; - value = IMIN(value, 260000*st->channels); - st->bitrate = value; - } - break; - case CELT_SET_CHANNELS_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>2) - goto bad_arg; - st->stream_channels = value; - } - break; - case OPUS_SET_LSB_DEPTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<8 || value>24) - goto bad_arg; - st->lsb_depth=value; - } - break; - case OPUS_GET_LSB_DEPTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - *value=st->lsb_depth; - } - break; - case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->variable_duration = value; - } - break; - case OPUS_RESET_STATE: - { - int i; - opus_val16 *oldBandE, *oldLogE, *oldLogE2; - oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD)); - oldLogE = oldBandE + st->channels*st->mode->nbEBands; - oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; - OPUS_CLEAR((char*)&st->ENCODER_RESET_START, - opus_custom_encoder_get_size(st->mode, st->channels)- - ((char*)&st->ENCODER_RESET_START - (char*)st)); - for (i=0;ichannels*st->mode->nbEBands;i++) - oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); - st->vbr_offset = 0; - st->delayedIntra = 1; - st->spread_decision = SPREAD_NORMAL; - st->tonal_average = 256; - st->hf_average = 0; - st->tapset_decision = 0; - } - break; -#ifdef CUSTOM_MODES - case CELT_SET_INPUT_CLIPPING_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->clip = value; - } - break; -#endif - case CELT_SET_SIGNALLING_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->signalling = value; - } - break; - case CELT_SET_ANALYSIS_REQUEST: - { - AnalysisInfo *info = va_arg(ap, AnalysisInfo *); - if (info) - OPUS_COPY(&st->analysis, info, 1); - } - break; - case CELT_GET_MODE_REQUEST: - { - const CELTMode ** value = va_arg(ap, const CELTMode**); - if (value==0) - goto bad_arg; - *value=st->mode; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 * value = va_arg(ap, opus_uint32 *); - if (value==0) - goto bad_arg; - *value=st->rng; - } - break; - case OPUS_SET_LFE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->lfe = value; - } - break; - case OPUS_SET_ENERGY_MASK_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_mask = value; - } - break; - default: - goto bad_request; - } - va_end(ap); - return OPUS_OK; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -bad_request: - va_end(ap); - return OPUS_UNIMPLEMENTED; -} diff --git a/thirdparty/opus/celt/celt_lpc.c b/thirdparty/opus/celt/celt_lpc.c deleted file mode 100644 index b410a21c5f..0000000000 --- a/thirdparty/opus/celt/celt_lpc.c +++ /dev/null @@ -1,314 +0,0 @@ -/* Copyright (c) 2009-2010 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -void _celt_lpc( - opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ -const opus_val32 *ac, /* in: [0...p] autocorrelation values */ -int p -) -{ - int i, j; - opus_val32 r; - opus_val32 error = ac[0]; -#ifdef FIXED_POINT - opus_val32 lpc[LPC_ORDER]; -#else - float *lpc = _lpc; -#endif - - OPUS_CLEAR(lpc, p); - if (ac[0] != 0) - { - for (i = 0; i < p; i++) { - /* Sum up this iteration's reflection coefficient */ - opus_val32 rr = 0; - for (j = 0; j < i; j++) - rr += MULT32_32_Q31(lpc[j],ac[i - j]); - rr += SHR32(ac[i + 1],3); - r = -frac_div32(SHL32(rr,3), error); - /* Update LPC coefficients and total error */ - lpc[i] = SHR32(r,3); - for (j = 0; j < (i+1)>>1; j++) - { - opus_val32 tmp1, tmp2; - tmp1 = lpc[j]; - tmp2 = lpc[i-1-j]; - lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); - lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); - } - - error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); - /* Bail out once we get 30 dB gain */ -#ifdef FIXED_POINT - if (error=1;j--) - { - mem[j]=mem[j-1]; - } - mem[0] = ROUND16(sum,SIG_SHIFT); - _y[i] = sum; - } -#else - int i,j; - VARDECL(opus_val16, rden); - VARDECL(opus_val16, y); - SAVE_STACK; - - celt_assert((ord&3)==0); - ALLOC(rden, ord, opus_val16); - ALLOC(y, N+ord, opus_val16); - for(i=0;i0); - celt_assert(overlap>=0); - if (overlap == 0) - { - xptr = x; - } else { - for (i=0;i0) - { - for(i=0;i= 536870912) - { - int shift2=1; - if (ac[0] >= 1073741824) - shift2++; - for (i=0;i<=lag;i++) - ac[i] = SHR32(ac[i], shift2); - shift += shift2; - } -#endif - - RESTORE_STACK; - return shift; -} diff --git a/thirdparty/opus/celt/celt_lpc.h b/thirdparty/opus/celt/celt_lpc.h deleted file mode 100644 index 323459eb1a..0000000000 --- a/thirdparty/opus/celt/celt_lpc.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2009-2010 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef PLC_H -#define PLC_H - -#include "arch.h" -#include "cpu_support.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/celt_lpc_sse.h" -#endif - -#define LPC_ORDER 24 - -void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); - -void celt_fir_c( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -#if !defined(OVERRIDE_CELT_FIR) -#define celt_fir(x, num, y, N, ord, mem, arch) \ - (celt_fir_c(x, num, y, N, ord, mem, arch)) -#endif - -void celt_iir(const opus_val32 *x, - const opus_val16 *den, - opus_val32 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, - const opus_val16 *window, int overlap, int lag, int n, int arch); - -#endif /* PLC_H */ diff --git a/thirdparty/opus/celt/cpu_support.h b/thirdparty/opus/celt/cpu_support.h deleted file mode 100644 index 68fc60678f..0000000000 --- a/thirdparty/opus/celt/cpu_support.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CPU_SUPPORT_H -#define CPU_SUPPORT_H - -#include "opus_types.h" -#include "opus_defines.h" - -#if defined(OPUS_HAVE_RTCD) && \ - (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) -#include "arm/armcpu.h" - -/* We currently support 4 ARM variants: - * arch[0] -> ARMv4 - * arch[1] -> ARMv5E - * arch[2] -> ARMv6 - * arch[3] -> NEON - */ -#define OPUS_ARCHMASK 3 - -#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) - -#include "x86/x86cpu.h" -/* We currently support 5 x86 variants: - * arch[0] -> non-sse - * arch[1] -> sse - * arch[2] -> sse2 - * arch[3] -> sse4.1 - * arch[4] -> avx - */ -#define OPUS_ARCHMASK 7 -int opus_select_arch(void); - -#else -#define OPUS_ARCHMASK 0 - -static OPUS_INLINE int opus_select_arch(void) -{ - return 0; -} -#endif -#endif diff --git a/thirdparty/opus/celt/cwrs.c b/thirdparty/opus/celt/cwrs.c deleted file mode 100644 index 9722f0ac86..0000000000 --- a/thirdparty/opus/celt/cwrs.c +++ /dev/null @@ -1,715 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2007-2009 Timothy B. Terriberry - Written by Timothy B. Terriberry and Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "os_support.h" -#include "cwrs.h" -#include "mathops.h" -#include "arch.h" - -#ifdef CUSTOM_MODES - -/*Guaranteed to return a conservatively large estimate of the binary logarithm - with frac bits of fractional precision. - Tested for all possible 32-bit inputs with frac=4, where the maximum - overestimation is 0.06254243 bits.*/ -int log2_frac(opus_uint32 val, int frac) -{ - int l; - l=EC_ILOG(val); - if(val&(val-1)){ - /*This is (val>>l-16), but guaranteed to round up, even if adding a bias - before the shift would cause overflow (e.g., for 0xFFFFxxxx). - Doesn't work for val=0, but that case fails the test above.*/ - if(l>16)val=((val-1)>>(l-16))+1; - else val<<=16-l; - l=(l-1)<>16); - l+=b<>b; - val=(val*val+0x7FFF)>>15; - } - while(frac-->0); - /*If val is not exactly 0x8000, then we have to round up the remainder.*/ - return l+(val>0x8000); - } - /*Exact powers of two require no rounding.*/ - else return (l-1)<0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1, - where choose() is the binomial function. - A table of values for N<10 and K<10 looks like: - V[10][10] = { - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 2, 2, 2, 2, 2, 2, 2, 2, 2}, - {1, 4, 8, 12, 16, 20, 24, 28, 32, 36}, - {1, 6, 18, 38, 66, 102, 146, 198, 258, 326}, - {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992}, - {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290}, - {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436}, - {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598}, - {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688}, - {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146} - }; - - U(N,K) = the number of such combinations wherein N-1 objects are taken at - most K-1 at a time. - This is given by - U(N,K) = sum(k=0...K-1,V(N-1,k)) - = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0. - The latter expression also makes clear that U(N,K) is half the number of such - combinations wherein the first object is taken at least once. - Although it may not be clear from either of these definitions, U(N,K) is the - natural function to work with when enumerating the pulse vector codebooks, - not V(N,K). - U(N,K) is not well-defined for N=0, but with the extension - U(0,K) = K>0 ? 0 : 1, - the function becomes symmetric: U(N,K) = U(K,N), with a similar table: - U[10][10] = { - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 1, 3, 5, 7, 9, 11, 13, 15, 17}, - {0, 1, 5, 13, 25, 41, 61, 85, 113, 145}, - {0, 1, 7, 25, 63, 129, 231, 377, 575, 833}, - {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649}, - {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073}, - {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081}, - {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545}, - {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729} - }; - - With this extension, V(N,K) may be written in terms of U(N,K): - V(N,K) = U(N,K) + U(N,K+1) - for all N>=0, K>=0. - Thus U(N,K+1) represents the number of combinations where the first element - is positive or zero, and U(N,K) represents the number of combinations where - it is negative. - With a large enough table of U(N,K) values, we could write O(N) encoding - and O(min(N*log(K),N+K)) decoding routines, but such a table would be - prohibitively large for small embedded devices (K may be as large as 32767 - for small N, and N may be as large as 200). - - Both functions obey the same recurrence relation: - V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1), - U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1), - for all N>0, K>0, with different initial conditions at N=0 or K=0. - This allows us to construct a row of one of the tables above given the - previous row or the next row. - Thus we can derive O(NK) encoding and decoding routines with O(K) memory - using only addition and subtraction. - - When encoding, we build up from the U(2,K) row and work our way forwards. - When decoding, we need to start at the U(N,K) row and work our way backwards, - which requires a means of computing U(N,K). - U(N,K) may be computed from two previous values with the same N: - U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2) - for all N>1, and since U(N,K) is symmetric, a similar relation holds for two - previous values with the same K: - U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K) - for all K>1. - This allows us to construct an arbitrary row of the U(N,K) table by starting - with the first two values, which are constants. - This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K) - multiplications. - Similar relations can be derived for V(N,K), but are not used here. - - For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree - polynomial for fixed N. - The first few are - U(1,K) = 1, - U(2,K) = 2*K-1, - U(3,K) = (2*K-2)*K+1, - U(4,K) = (((4*K-6)*K+8)*K-3)/3, - U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3, - and - V(1,K) = 2, - V(2,K) = 4*K, - V(3,K) = 4*K*K+2, - V(4,K) = 8*(K*K+2)*K/3, - V(5,K) = ((4*K*K+20)*K*K+6)/3, - for all K>0. - This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for - small N (and indeed decoding is also O(N) for N<3). - - @ARTICLE{Fis86, - author="Thomas R. Fischer", - title="A Pyramid Vector Quantizer", - journal="IEEE Transactions on Information Theory", - volume="IT-32", - number=4, - pages="568--583", - month=Jul, - year=1986 - }*/ - -#if !defined(SMALL_FOOTPRINT) - -/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/ -# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)]) -/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N - with K pulses allocated to it.*/ -# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1)) - -/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)). - Thus, the number of entries in row I is the larger of the maximum number of - pulses we will ever allocate for a given N=I (K=128, or however many fit in - 32 bits, whichever is smaller), plus one, and the maximum N for which - K=I-1 pulses fit in 32 bits. - The largest band size in an Opus Custom mode is 208. - Otherwise, we can limit things to the set of N which can be achieved by - splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48, - 44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/ -#if defined(CUSTOM_MODES) -static const opus_uint32 CELT_PVQ_U_DATA[1488]={ -#else -static const opus_uint32 CELT_PVQ_U_DATA[1272]={ -#endif - /*N=0, K=0...176:*/ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -#if defined(CUSTOM_MODES) - /*...208:*/ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, -#endif - /*N=1, K=1...176:*/ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -#if defined(CUSTOM_MODES) - /*...208:*/ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, -#endif - /*N=2, K=2...176:*/ - 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, - 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, - 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, - 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143, - 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, - 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, - 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, - 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, - 265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293, - 295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323, - 325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351, -#if defined(CUSTOM_MODES) - /*...208:*/ - 353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381, - 383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411, - 413, 415, -#endif - /*N=3, K=3...176:*/ - 13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613, - 685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861, - 1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785, - 3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385, - 6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661, - 9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961, - 13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745, - 17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013, - 21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765, - 26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001, - 31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721, - 37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925, - 43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613, - 50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785, - 57461, 58141, 58825, 59513, 60205, 60901, 61601, -#if defined(CUSTOM_MODES) - /*...208:*/ - 62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565, - 70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013, - 78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113, -#endif - /*N=4, K=4...176:*/ - 63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017, - 7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775, - 30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153, - 82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193, - 161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575, - 267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217, - 410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951, - 597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609, - 833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023, - 1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407, - 1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759, - 1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175, - 2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751, - 2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583, - 3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767, - 3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399, - 4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575, - 5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391, - 6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943, - 7085049, 7207551, -#if defined(CUSTOM_MODES) - /*...208:*/ - 7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783, - 8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967, - 9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199, - 10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177, - 11912575, -#endif - /*N=5, K=5...176:*/ - 321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041, - 50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401, - 330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241, - 1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241, - 2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801, - 4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849, - 8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849, - 13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809, - 20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881, - 29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641, - 40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081, - 55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609, - 73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049, - 95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641, - 122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041, - 155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321, - 193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969, - 238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889, - 290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401, - 351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241, - 420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561, - 500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929, - 590359041, 604167209, 618216201, 632508801, -#if defined(CUSTOM_MODES) - /*...208:*/ - 647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241, - 755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161, - 878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329, - 1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041, - 1143412929, 1166053121, 1189027881, 1212340489, 1235994241, -#endif - /*N=6, K=6...96:*/ - 1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047, - 335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409, - 2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793, - 11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455, - 29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189, - 64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651, - 128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185, - 235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647, - 402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229, - 655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283, - 1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135, - 1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187, - 2011371957, 2120032959, -#if defined(CUSTOM_MODES) - /*...109:*/ - 2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U, - 3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U, - 4012305913U, -#endif - /*N=7, K=7...54*/ - 8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777, - 1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233, - 19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013, - 88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805, - 292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433, - 793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821, - 1667010073, 1870535785, 2094367717, -#if defined(CUSTOM_MODES) - /*...60:*/ - 2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U, -#endif - /*N=8, K=8...37*/ - 48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767, - 9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017, - 104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351, - 638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615, - 2229491905U, -#if defined(CUSTOM_MODES) - /*...40:*/ - 2691463695U, 3233240945U, 3866006015U, -#endif - /*N=9, K=9...28:*/ - 265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777, - 39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145, - 628496897, 872893441, 1196924561, 1621925137, 2173806145U, -#if defined(CUSTOM_MODES) - /*...29:*/ - 2883810113U, -#endif - /*N=10, K=10...24:*/ - 1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073, - 254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U, - 3375210671U, - /*N=11, K=11...19:*/ - 8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585, - 948062325, 1616336765, -#if defined(CUSTOM_MODES) - /*...20:*/ - 2684641785U, -#endif - /*N=12, K=12...18:*/ - 45046719, 103274625, 224298231, 464387817, 921406335, 1759885185, - 3248227095U, - /*N=13, K=13...16:*/ - 251595969, 579168825, 1267854873, 2653649025U, - /*N=14, K=14:*/ - 1409933619 -}; - -#if defined(CUSTOM_MODES) -static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ - CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415, - CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030, - CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389, - CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455, - CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473 -}; -#else -static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ - CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351, - CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870, - CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178, - CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240, - CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257 -}; -#endif - -#if defined(CUSTOM_MODES) -void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ - int k; - /*_maxk==0 => there's nothing to do.*/ - celt_assert(_maxk>0); - _bits[0]=0; - for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac); -} -#endif - -static opus_uint32 icwrs(int _n,const int *_y){ - opus_uint32 i; - int j; - int k; - celt_assert(_n>=2); - j=_n-1; - i=_y[j]<0; - k=abs(_y[j]); - do{ - j--; - i+=CELT_PVQ_U(_n-j,k); - k+=abs(_y[j]); - if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1); - } - while(j>0); - return i; -} - -void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ - celt_assert(_k>0); - ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); -} - -static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ - opus_uint32 p; - int s; - int k0; - opus_int16 val; - opus_val32 yy=0; - celt_assert(_k>0); - celt_assert(_n>1); - while(_n>2){ - opus_uint32 q; - /*Lots of pulses case:*/ - if(_k>=_n){ - const opus_uint32 *row; - row=CELT_PVQ_U_ROW[_n]; - /*Are the pulses in this dimension negative?*/ - p=row[_k+1]; - s=-(_i>=p); - _i-=p&s; - /*Count how many pulses were placed in this dimension.*/ - k0=_k; - q=row[_n]; - if(q>_i){ - celt_assert(p>q); - _k=_n; - do p=CELT_PVQ_U_ROW[--_k][_n]; - while(p>_i); - } - else for(p=row[_k];p>_i;p=row[_k])_k--; - _i-=p; - val=(k0-_k+s)^s; - *_y++=val; - yy=MAC16_16(yy,val,val); - } - /*Lots of dimensions case:*/ - else{ - /*Are there any pulses in this dimension at all?*/ - p=CELT_PVQ_U_ROW[_k][_n]; - q=CELT_PVQ_U_ROW[_k+1][_n]; - if(p<=_i&&_i=q); - _i-=q&s; - /*Count how many pulses were placed in this dimension.*/ - k0=_k; - do p=CELT_PVQ_U_ROW[--_k][_n]; - while(p>_i); - _i-=p; - val=(k0-_k+s)^s; - *_y++=val; - yy=MAC16_16(yy,val,val); - } - } - _n--; - } - /*_n==2*/ - p=2*_k+1; - s=-(_i>=p); - _i-=p&s; - k0=_k; - _k=(_i+1)>>1; - if(_k)_i-=2*_k-1; - val=(k0-_k+s)^s; - *_y++=val; - yy=MAC16_16(yy,val,val); - /*_n==1*/ - s=-(int)_i; - val=(_k+s)^s; - *_y=val; - yy=MAC16_16(yy,val,val); - return yy; -} - -opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); -} - -#else /* SMALL_FOOTPRINT */ - -/*Computes the next row/column of any recurrence that obeys the relation - u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1]. - _ui0 is the base case for the new row/column.*/ -static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){ - opus_uint32 ui1; - unsigned j; - /*This do-while will overrun the array if we don't have storage for at least - 2 values.*/ - j=1; do { - ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0); - _ui[j-1]=_ui0; - _ui0=ui1; - } while (++j<_len); - _ui[j-1]=_ui0; -} - -/*Computes the previous row/column of any recurrence that obeys the relation - u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1]. - _ui0 is the base case for the new row/column.*/ -static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){ - opus_uint32 ui1; - unsigned j; - /*This do-while will overrun the array if we don't have storage for at least - 2 values.*/ - j=1; do { - ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0); - _ui[j-1]=_ui0; - _ui0=ui1; - } while (++j<_n); - _ui[j-1]=_ui0; -} - -/*Compute V(_n,_k), as well as U(_n,0..._k+1). - _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/ -static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ - opus_uint32 um2; - unsigned len; - unsigned k; - len=_k+2; - /*We require storage at least 3 values (e.g., _k>0).*/ - celt_assert(len>=3); - _u[0]=0; - _u[1]=um2=1; - /*If _n==0, _u[0] should be 1 and the rest should be 0.*/ - /*If _n==1, _u[i] should be 1 for i>1.*/ - celt_assert(_n>=2); - /*If _k==0, the following do-while loop will overflow the buffer.*/ - celt_assert(_k>0); - k=2; - do _u[k]=(k<<1)-1; - while(++k0); - j=0; - do{ - opus_uint32 p; - int s; - int yj; - p=_u[_k+1]; - s=-(_i>=p); - _i-=p&s; - yj=_k; - p=_u[_k]; - while(p>_i)p=_u[--_k]; - _i-=p; - yj-=_k; - val=(yj+s)^s; - _y[j]=val; - yy=MAC16_16(yy,val,val); - uprev(_u,_k+2,0); - } - while(++j<_n); - return yy; -} - -/*Returns the index of the given combination of K elements chosen from a set - of size 1 with associated sign bits. - _y: The vector of pulses, whose sum of absolute values is K. - _k: Returns K.*/ -static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){ - *_k=abs(_y[0]); - return _y[0]<0; -} - -/*Returns the index of the given combination of K elements chosen from a set - of size _n with associated sign bits. - _y: The vector of pulses, whose sum of absolute values must be _k. - _nc: Returns V(_n,_k).*/ -static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, - opus_uint32 *_u){ - opus_uint32 i; - int j; - int k; - /*We can't unroll the first two iterations of the loop unless _n>=2.*/ - celt_assert(_n>=2); - _u[0]=0; - for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1; - i=icwrs1(_y+_n-1,&k); - j=_n-2; - i+=_u[k]; - k+=abs(_y[j]); - if(_y[j]<0)i+=_u[k+1]; - while(j-->0){ - unext(_u,_k+2,0); - i+=_u[k]; - k+=abs(_y[j]); - if(_y[j]<0)i+=_u[k+1]; - } - *_nc=_u[k]+_u[k+1]; - return i; -} - -#ifdef CUSTOM_MODES -void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ - int k; - /*_maxk==0 => there's nothing to do.*/ - celt_assert(_maxk>0); - _bits[0]=0; - if (_n==1) - { - for (k=1;k<=_maxk;k++) - _bits[k] = 1<<_frac; - } - else { - VARDECL(opus_uint32,u); - SAVE_STACK; - ALLOC(u,_maxk+2U,opus_uint32); - ncwrs_urow(_n,_maxk,u); - for(k=1;k<=_maxk;k++) - _bits[k]=log2_frac(u[k]+u[k+1],_frac); - RESTORE_STACK; - } -} -#endif /* CUSTOM_MODES */ - -void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ - opus_uint32 i; - VARDECL(opus_uint32,u); - opus_uint32 nc; - SAVE_STACK; - celt_assert(_k>0); - ALLOC(u,_k+2U,opus_uint32); - i=icwrs(_n,_k,&nc,_y,u); - ec_enc_uint(_enc,i,nc); - RESTORE_STACK; -} - -opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - VARDECL(opus_uint32,u); - int ret; - SAVE_STACK; - celt_assert(_k>0); - ALLOC(u,_k+2U,opus_uint32); - ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); - RESTORE_STACK; - return ret; -} - -#endif /* SMALL_FOOTPRINT */ diff --git a/thirdparty/opus/celt/cwrs.h b/thirdparty/opus/celt/cwrs.h deleted file mode 100644 index 7cd4717459..0000000000 --- a/thirdparty/opus/celt/cwrs.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2007-2009 Timothy B. Terriberry - Written by Timothy B. Terriberry and Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CWRS_H -#define CWRS_H - -#include "arch.h" -#include "stack_alloc.h" -#include "entenc.h" -#include "entdec.h" - -#ifdef CUSTOM_MODES -int log2_frac(opus_uint32 val, int frac); -#endif - -void get_required_bits(opus_int16 *bits, int N, int K, int frac); - -void encode_pulses(const int *_y, int N, int K, ec_enc *enc); - -opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); - -#endif /* CWRS_H */ diff --git a/thirdparty/opus/celt/ecintrin.h b/thirdparty/opus/celt/ecintrin.h deleted file mode 100644 index 2263cff6bd..0000000000 --- a/thirdparty/opus/celt/ecintrin.h +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright (c) 2003-2008 Timothy B. Terriberry - Copyright (c) 2008 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*Some common macros for potential platform-specific optimization.*/ -#include "opus_types.h" -#include -#include -#include "arch.h" -#if !defined(_ecintrin_H) -# define _ecintrin_H (1) - -/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly - versions of these functions which can substantially improve performance. - We define macros for them to allow easy incorporation of these non-ANSI - features.*/ - -/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if - given an appropriate architecture, but the branchless bit-twiddling versions - are just as fast, and do not require any special target architecture. - Earlier gcc versions (3.x) compiled both code to the same assembly - instructions, because of the way they represented ((_b)>(_a)) internally.*/ -# define EC_MINI(_a,_b) ((_a)+(((_b)-(_a))&-((_b)<(_a)))) - -/*Count leading zeros. - This macro should only be used for implementing ec_ilog(), if it is defined. - All other code should use EC_ILOG() instead.*/ -#if defined(_MSC_VER) && (_MSC_VER >= 1400) -# include -/*In _DEBUG mode this is not an intrinsic by default.*/ -# pragma intrinsic(_BitScanReverse) - -static __inline int ec_bsr(unsigned long _x){ - unsigned long ret; - _BitScanReverse(&ret,_x); - return (int)ret; -} -# define EC_CLZ0 (1) -# define EC_CLZ(_x) (-ec_bsr(_x)) -#elif defined(ENABLE_TI_DSPLIB) -# include "dsplib.h" -# define EC_CLZ0 (31) -# define EC_CLZ(_x) (_lnorm(_x)) -#elif __GNUC_PREREQ(3,4) -# if INT_MAX>=2147483647 -# define EC_CLZ0 ((int)sizeof(unsigned)*CHAR_BIT) -# define EC_CLZ(_x) (__builtin_clz(_x)) -# elif LONG_MAX>=2147483647L -# define EC_CLZ0 ((int)sizeof(unsigned long)*CHAR_BIT) -# define EC_CLZ(_x) (__builtin_clzl(_x)) -# endif -#endif - -#if defined(EC_CLZ) -/*Note that __builtin_clz is not defined when _x==0, according to the gcc - documentation (and that of the BSR instruction that implements it on x86). - The majority of the time we can never pass it zero. - When we need to, it can be special cased.*/ -# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x)) -#else -int ec_ilog(opus_uint32 _v); -# define EC_ILOG(_x) (ec_ilog(_x)) -#endif -#endif diff --git a/thirdparty/opus/celt/entcode.c b/thirdparty/opus/celt/entcode.c deleted file mode 100644 index 70f32016ec..0000000000 --- a/thirdparty/opus/celt/entcode.c +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "entcode.h" -#include "arch.h" - -#if !defined(EC_CLZ) -/*This is a fallback for systems where we don't know how to access - a BSR or CLZ instruction (see ecintrin.h). - If you are optimizing Opus on a new platform and it has a native CLZ or - BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be - an easy performance win.*/ -int ec_ilog(opus_uint32 _v){ - /*On a Pentium M, this branchless version tested as the fastest on - 1,000,000,000 random 32-bit integers, edging out a similar version with - branches, and a 256-entry LUT version.*/ - int ret; - int m; - ret=!!_v; - m=!!(_v&0xFFFF0000)<<4; - _v>>=m; - ret|=m; - m=!!(_v&0xFF00)<<3; - _v>>=m; - ret|=m; - m=!!(_v&0xF0)<<2; - _v>>=m; - ret|=m; - m=!!(_v&0xC)<<1; - _v>>=m; - ret|=m; - ret+=!!(_v&0x2); - return ret; -} -#endif - -#if 1 -/* This is a faster version of ec_tell_frac() that takes advantage - of the low (1/8 bit) resolution to use just a linear function - followed by a lookup to determine the exact transition thresholds. */ -opus_uint32 ec_tell_frac(ec_ctx *_this){ - static const unsigned correction[8] = - {35733, 38967, 42495, 46340, - 50535, 55109, 60097, 65535}; - opus_uint32 nbits; - opus_uint32 r; - int l; - unsigned b; - nbits=_this->nbits_total<rng); - r=_this->rng>>(l-16); - b = (r>>12)-8; - b += r>correction[b]; - l = (l<<3)+b; - return nbits-l; -} -#else -opus_uint32 ec_tell_frac(ec_ctx *_this){ - opus_uint32 nbits; - opus_uint32 r; - int l; - int i; - /*To handle the non-integral number of bits still left in the encoder/decoder - state, we compute the worst-case number of bits of val that must be - encoded to ensure that the value is inside the range for any possible - subsequent bits. - The computation here is independent of val itself (the decoder does not - even track that value), even though the real number of bits used after - ec_enc_done() may be 1 smaller if rng is a power of two and the - corresponding trailing bits of val are all zeros. - If we did try to track that special case, then coding a value with a - probability of 1/(1<nbits_total<rng); - r=_this->rng>>(l-16); - for(i=BITRES;i-->0;){ - int b; - r=r*r>>15; - b=(int)(r>>16); - l=l<<1|b; - r>>=b; - } - return nbits-l; -} -#endif - -#ifdef USE_SMALL_DIV_TABLE -/* Result of 2^32/(2*i+1), except for i=0. */ -const opus_uint32 SMALL_DIV_TABLE[129] = { - 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, - 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, - 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, - 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, - 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, - 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, - 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, - 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, - 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, - 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, - 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, - 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, - 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, - 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, - 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, - 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, - 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, - 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, - 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, - 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, - 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, - 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, - 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, - 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, - 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, - 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, - 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, - 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, - 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, - 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, - 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, - 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 -}; -#endif diff --git a/thirdparty/opus/celt/entcode.h b/thirdparty/opus/celt/entcode.h deleted file mode 100644 index 13d6c84ef0..0000000000 --- a/thirdparty/opus/celt/entcode.h +++ /dev/null @@ -1,152 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "opus_types.h" -#include "opus_defines.h" - -#if !defined(_entcode_H) -# define _entcode_H (1) -# include -# include -# include "ecintrin.h" - -extern const opus_uint32 SMALL_DIV_TABLE[129]; - -#ifdef OPUS_ARM_ASM -#define USE_SMALL_DIV_TABLE -#endif - -/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a - larger type, you can speed up the decoder by using it here.*/ -typedef opus_uint32 ec_window; -typedef struct ec_ctx ec_ctx; -typedef struct ec_ctx ec_enc; -typedef struct ec_ctx ec_dec; - -# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT) - -/*The number of bits to use for the range-coded part of unsigned integers.*/ -# define EC_UINT_BITS (8) - -/*The resolution of fractional-precision bit usage measurements, i.e., - 3 => 1/8th bits.*/ -# define BITRES 3 - -/*The entropy encoder/decoder context. - We use the same structure for both, so that common functions like ec_tell() - can be used on either one.*/ -struct ec_ctx{ - /*Buffered input/output.*/ - unsigned char *buf; - /*The size of the buffer.*/ - opus_uint32 storage; - /*The offset at which the last byte containing raw bits was read/written.*/ - opus_uint32 end_offs; - /*Bits that will be read from/written at the end.*/ - ec_window end_window; - /*Number of valid bits in end_window.*/ - int nend_bits; - /*The total number of whole bits read/written. - This does not include partial bits currently in the range coder.*/ - int nbits_total; - /*The offset at which the next range coder byte will be read/written.*/ - opus_uint32 offs; - /*The number of values in the current range.*/ - opus_uint32 rng; - /*In the decoder: the difference between the top of the current range and - the input value, minus one. - In the encoder: the low end of the current range.*/ - opus_uint32 val; - /*In the decoder: the saved normalization factor from ec_decode(). - In the encoder: the number of oustanding carry propagating symbols.*/ - opus_uint32 ext; - /*A buffered input/output symbol, awaiting carry propagation.*/ - int rem; - /*Nonzero if an error occurred.*/ - int error; -}; - -static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){ - return _this->offs; -} - -static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){ - return _this->buf; -} - -static OPUS_INLINE int ec_get_error(ec_ctx *_this){ - return _this->error; -} - -/*Returns the number of bits "used" by the encoded or decoded symbols so far. - This same number can be computed in either the encoder or the decoder, and is - suitable for making coding decisions. - Return: The number of bits. - This will always be slightly larger than the exact value (e.g., all - rounding error is in the positive direction).*/ -static OPUS_INLINE int ec_tell(ec_ctx *_this){ - return _this->nbits_total-EC_ILOG(_this->rng); -} - -/*Returns the number of bits "used" by the encoded or decoded symbols so far. - This same number can be computed in either the encoder or the decoder, and is - suitable for making coding decisions. - Return: The number of bits scaled by 2**BITRES. - This will always be slightly larger than the exact value (e.g., all - rounding error is in the positive direction).*/ -opus_uint32 ec_tell_frac(ec_ctx *_this); - -/* Tested exhaustively for all n and for 1<=d<=256 */ -static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { - celt_assert(d>0); -#ifdef USE_SMALL_DIV_TABLE - if (d>256) - return n/d; - else { - opus_uint32 t, q; - t = EC_ILOG(d&-d); - q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; - return q+(n-q*d >= d); - } -#else - return n/d; -#endif -} - -static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { - celt_assert(d>0); -#ifdef USE_SMALL_DIV_TABLE - if (n<0) - return -(opus_int32)celt_udiv(-n, d); - else - return celt_udiv(n, d); -#else - return n/d; -#endif -} - -#endif diff --git a/thirdparty/opus/celt/entdec.c b/thirdparty/opus/celt/entdec.c deleted file mode 100644 index 0b3433ed8b..0000000000 --- a/thirdparty/opus/celt/entdec.c +++ /dev/null @@ -1,245 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "os_support.h" -#include "arch.h" -#include "entdec.h" -#include "mfrngcod.h" - -/*A range decoder. - This is an entropy decoder based upon \cite{Mar79}, which is itself a - rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. - It is very similar to arithmetic encoding, except that encoding is done with - digits in any base, instead of with bits, and so it is faster when using - larger bases (i.e.: a byte). - The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ - is the base, longer than the theoretical optimum, but to my knowledge there - is no published justification for this claim. - This only seems true when using near-infinite precision arithmetic so that - the process is carried out with no rounding errors. - - An excellent description of implementation details is available at - http://www.arturocampos.com/ac_range.html - A recent work \cite{MNW98} which proposes several changes to arithmetic - encoding for efficiency actually re-discovers many of the principles - behind range encoding, and presents a good theoretical analysis of them. - - End of stream is handled by writing out the smallest number of bits that - ensures that the stream will be correctly decoded regardless of the value of - any subsequent bits. - ec_tell() can be used to determine how many bits were needed to decode - all the symbols thus far; other data can be packed in the remaining bits of - the input buffer. - @PHDTHESIS{Pas76, - author="Richard Clark Pasco", - title="Source coding algorithms for fast data compression", - school="Dept. of Electrical Engineering, Stanford University", - address="Stanford, CA", - month=May, - year=1976 - } - @INPROCEEDINGS{Mar79, - author="Martin, G.N.N.", - title="Range encoding: an algorithm for removing redundancy from a digitised - message", - booktitle="Video & Data Recording Conference", - year=1979, - address="Southampton", - month=Jul - } - @ARTICLE{MNW98, - author="Alistair Moffat and Radford Neal and Ian H. Witten", - title="Arithmetic Coding Revisited", - journal="{ACM} Transactions on Information Systems", - year=1998, - volume=16, - number=3, - pages="256--294", - month=Jul, - URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf" - }*/ - -static int ec_read_byte(ec_dec *_this){ - return _this->offs<_this->storage?_this->buf[_this->offs++]:0; -} - -static int ec_read_byte_from_end(ec_dec *_this){ - return _this->end_offs<_this->storage? - _this->buf[_this->storage-++(_this->end_offs)]:0; -} - -/*Normalizes the contents of val and rng so that rng lies entirely in the - high-order symbol.*/ -static void ec_dec_normalize(ec_dec *_this){ - /*If the range is too small, rescale it and input some bits.*/ - while(_this->rng<=EC_CODE_BOT){ - int sym; - _this->nbits_total+=EC_SYM_BITS; - _this->rng<<=EC_SYM_BITS; - /*Use up the remaining bits from our last symbol.*/ - sym=_this->rem; - /*Read the next value from the input.*/ - _this->rem=ec_read_byte(_this); - /*Take the rest of the bits we need from this new symbol.*/ - sym=(sym<rem)>>(EC_SYM_BITS-EC_CODE_EXTRA); - /*And subtract them from val, capped to be less than EC_CODE_TOP.*/ - _this->val=((_this->val<buf=_buf; - _this->storage=_storage; - _this->end_offs=0; - _this->end_window=0; - _this->nend_bits=0; - /*This is the offset from which ec_tell() will subtract partial bits. - The final value after the ec_dec_normalize() call will be the same as in - the encoder, but we have to compensate for the bits that are added there.*/ - _this->nbits_total=EC_CODE_BITS+1 - -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS; - _this->offs=0; - _this->rng=1U<rem=ec_read_byte(_this); - _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA)); - _this->error=0; - /*Normalize the interval.*/ - ec_dec_normalize(_this); -} - -unsigned ec_decode(ec_dec *_this,unsigned _ft){ - unsigned s; - _this->ext=celt_udiv(_this->rng,_ft); - s=(unsigned)(_this->val/_this->ext); - return _ft-EC_MINI(s+1,_ft); -} - -unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){ - unsigned s; - _this->ext=_this->rng>>_bits; - s=(unsigned)(_this->val/_this->ext); - return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits); -} - -void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){ - opus_uint32 s; - s=IMUL32(_this->ext,_ft-_fh); - _this->val-=s; - _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s; - ec_dec_normalize(_this); -} - -/*The probability of having a "one" is 1/(1<<_logp).*/ -int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){ - opus_uint32 r; - opus_uint32 d; - opus_uint32 s; - int ret; - r=_this->rng; - d=_this->val; - s=r>>_logp; - ret=dval=d-s; - _this->rng=ret?s:r-s; - ec_dec_normalize(_this); - return ret; -} - -int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){ - opus_uint32 r; - opus_uint32 d; - opus_uint32 s; - opus_uint32 t; - int ret; - s=_this->rng; - d=_this->val; - r=s>>_ftb; - ret=-1; - do{ - t=s; - s=IMUL32(r,_icdf[++ret]); - } - while(dval=d-s; - _this->rng=t-s; - ec_dec_normalize(_this); - return ret; -} - -opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){ - unsigned ft; - unsigned s; - int ftb; - /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ - celt_assert(_ft>1); - _ft--; - ftb=EC_ILOG(_ft); - if(ftb>EC_UINT_BITS){ - opus_uint32 t; - ftb-=EC_UINT_BITS; - ft=(unsigned)(_ft>>ftb)+1; - s=ec_decode(_this,ft); - ec_dec_update(_this,s,s+1,ft); - t=(opus_uint32)s<error=1; - return _ft; - } - else{ - _ft++; - s=ec_decode(_this,(unsigned)_ft); - ec_dec_update(_this,s,s+1,(unsigned)_ft); - return s; - } -} - -opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){ - ec_window window; - int available; - opus_uint32 ret; - window=_this->end_window; - available=_this->nend_bits; - if((unsigned)available<_bits){ - do{ - window|=(ec_window)ec_read_byte_from_end(_this)<>=_bits; - available-=_bits; - _this->end_window=window; - _this->nend_bits=available; - _this->nbits_total+=_bits; - return ret; -} diff --git a/thirdparty/opus/celt/entdec.h b/thirdparty/opus/celt/entdec.h deleted file mode 100644 index d8ab318730..0000000000 --- a/thirdparty/opus/celt/entdec.h +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(_entdec_H) -# define _entdec_H (1) -# include -# include "entcode.h" - -/*Initializes the decoder. - _buf: The input buffer to use. - Return: 0 on success, or a negative value on error.*/ -void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage); - -/*Calculates the cumulative frequency for the next symbol. - This can then be fed into the probability model to determine what that - symbol is, and the additional frequency information required to advance to - the next symbol. - This function cannot be called more than once without a corresponding call to - ec_dec_update(), or decoding will not proceed correctly. - _ft: The total frequency of the symbols in the alphabet the next symbol was - encoded with. - Return: A cumulative frequency representing the encoded symbol. - If the cumulative frequency of all the symbols before the one that - was encoded was fl, and the cumulative frequency of all the symbols - up to and including the one encoded is fh, then the returned value - will fall in the range [fl,fh).*/ -unsigned ec_decode(ec_dec *_this,unsigned _ft); - -/*Equivalent to ec_decode() with _ft==1<<_bits.*/ -unsigned ec_decode_bin(ec_dec *_this,unsigned _bits); - -/*Advance the decoder past the next symbol using the frequency information the - symbol was encoded with. - Exactly one call to ec_decode() must have been made so that all necessary - intermediate calculations are performed. - _fl: The cumulative frequency of all symbols that come before the symbol - decoded. - _fh: The cumulative frequency of all symbols up to and including the symbol - decoded. - Together with _fl, this defines the range [_fl,_fh) in which the value - returned above must fall. - _ft: The total frequency of the symbols in the alphabet the symbol decoded - was encoded in. - This must be the same as passed to the preceding call to ec_decode().*/ -void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft); - -/* Decode a bit that has a 1/(1<<_logp) probability of being a one */ -int ec_dec_bit_logp(ec_dec *_this,unsigned _logp); - -/*Decodes a symbol given an "inverse" CDF table. - No call to ec_dec_update() is necessary after this call. - _icdf: The "inverse" CDF, such that symbol s falls in the range - [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb. - The values must be monotonically non-increasing, and the last value - must be 0. - _ftb: The number of bits of precision in the cumulative distribution. - Return: The decoded symbol s.*/ -int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb); - -/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream. - The bits must have been encoded with ec_enc_uint(). - No call to ec_dec_update() is necessary after this call. - _ft: The number of integers that can be decoded (one more than the max). - This must be at least one, and no more than 2**32-1. - Return: The decoded bits.*/ -opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft); - -/*Extracts a sequence of raw bits from the stream. - The bits must have been encoded with ec_enc_bits(). - No call to ec_dec_update() is necessary after this call. - _ftb: The number of bits to extract. - This must be between 0 and 25, inclusive. - Return: The decoded bits.*/ -opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb); - -#endif diff --git a/thirdparty/opus/celt/entenc.c b/thirdparty/opus/celt/entenc.c deleted file mode 100644 index f1750d25b8..0000000000 --- a/thirdparty/opus/celt/entenc.c +++ /dev/null @@ -1,294 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if defined(HAVE_CONFIG_H) -# include "config.h" -#endif -#include "os_support.h" -#include "arch.h" -#include "entenc.h" -#include "mfrngcod.h" - -/*A range encoder. - See entdec.c and the references for implementation details \cite{Mar79,MNW98}. - - @INPROCEEDINGS{Mar79, - author="Martin, G.N.N.", - title="Range encoding: an algorithm for removing redundancy from a digitised - message", - booktitle="Video \& Data Recording Conference", - year=1979, - address="Southampton", - month=Jul - } - @ARTICLE{MNW98, - author="Alistair Moffat and Radford Neal and Ian H. Witten", - title="Arithmetic Coding Revisited", - journal="{ACM} Transactions on Information Systems", - year=1998, - volume=16, - number=3, - pages="256--294", - month=Jul, - URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf" - }*/ - -static int ec_write_byte(ec_enc *_this,unsigned _value){ - if(_this->offs+_this->end_offs>=_this->storage)return -1; - _this->buf[_this->offs++]=(unsigned char)_value; - return 0; -} - -static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){ - if(_this->offs+_this->end_offs>=_this->storage)return -1; - _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value; - return 0; -} - -/*Outputs a symbol, with a carry bit. - If there is a potential to propagate a carry over several symbols, they are - buffered until it can be determined whether or not an actual carry will - occur. - If the counter for the buffered symbols overflows, then the stream becomes - undecodable. - This gives a theoretical limit of a few billion symbols in a single packet on - 32-bit systems. - The alternative is to truncate the range in order to force a carry, but - requires similar carry tracking in the decoder, needlessly slowing it down.*/ -static void ec_enc_carry_out(ec_enc *_this,int _c){ - if(_c!=EC_SYM_MAX){ - /*No further carry propagation possible, flush buffer.*/ - int carry; - carry=_c>>EC_SYM_BITS; - /*Don't output a byte on the first write. - This compare should be taken care of by branch-prediction thereafter.*/ - if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry); - if(_this->ext>0){ - unsigned sym; - sym=(EC_SYM_MAX+carry)&EC_SYM_MAX; - do _this->error|=ec_write_byte(_this,sym); - while(--(_this->ext)>0); - } - _this->rem=_c&EC_SYM_MAX; - } - else _this->ext++; -} - -static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){ - /*If the range is too small, output some bits and rescale it.*/ - while(_this->rng<=EC_CODE_BOT){ - ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT)); - /*Move the next-to-high-order symbol into the high-order position.*/ - _this->val=(_this->val<rng<<=EC_SYM_BITS; - _this->nbits_total+=EC_SYM_BITS; - } -} - -void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ - _this->buf=_buf; - _this->end_offs=0; - _this->end_window=0; - _this->nend_bits=0; - /*This is the offset from which ec_tell() will subtract partial bits.*/ - _this->nbits_total=EC_CODE_BITS+1; - _this->offs=0; - _this->rng=EC_CODE_TOP; - _this->rem=-1; - _this->val=0; - _this->ext=0; - _this->storage=_size; - _this->error=0; -} - -void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ - opus_uint32 r; - r=celt_udiv(_this->rng,_ft); - if(_fl>0){ - _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); - _this->rng=IMUL32(r,(_fh-_fl)); - } - else _this->rng-=IMUL32(r,(_ft-_fh)); - ec_enc_normalize(_this); -} - -void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){ - opus_uint32 r; - r=_this->rng>>_bits; - if(_fl>0){ - _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl)); - _this->rng=IMUL32(r,(_fh-_fl)); - } - else _this->rng-=IMUL32(r,((1U<<_bits)-_fh)); - ec_enc_normalize(_this); -} - -/*The probability of having a "one" is 1/(1<<_logp).*/ -void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){ - opus_uint32 r; - opus_uint32 s; - opus_uint32 l; - r=_this->rng; - l=_this->val; - s=r>>_logp; - r-=s; - if(_val)_this->val=l+r; - _this->rng=_val?s:r; - ec_enc_normalize(_this); -} - -void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){ - opus_uint32 r; - r=_this->rng>>_ftb; - if(_s>0){ - _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]); - _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]); - } - else _this->rng-=IMUL32(r,_icdf[_s]); - ec_enc_normalize(_this); -} - -void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){ - unsigned ft; - unsigned fl; - int ftb; - /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ - celt_assert(_ft>1); - _ft--; - ftb=EC_ILOG(_ft); - if(ftb>EC_UINT_BITS){ - ftb-=EC_UINT_BITS; - ft=(_ft>>ftb)+1; - fl=(unsigned)(_fl>>ftb); - ec_encode(_this,fl,fl+1,ft); - ec_enc_bits(_this,_fl&(((opus_uint32)1<end_window; - used=_this->nend_bits; - celt_assert(_bits>0); - if(used+_bits>EC_WINDOW_SIZE){ - do{ - _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); - window>>=EC_SYM_BITS; - used-=EC_SYM_BITS; - } - while(used>=EC_SYM_BITS); - } - window|=(ec_window)_fl<end_window=window; - _this->nend_bits=used; - _this->nbits_total+=_bits; -} - -void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){ - int shift; - unsigned mask; - celt_assert(_nbits<=EC_SYM_BITS); - shift=EC_SYM_BITS-_nbits; - mask=((1<<_nbits)-1)<offs>0){ - /*The first byte has been finalized.*/ - _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<rem>=0){ - /*The first byte is still awaiting carry propagation.*/ - _this->rem=(_this->rem&~mask)|_val<rng<=(EC_CODE_TOP>>_nbits)){ - /*The renormalization loop has never been run.*/ - _this->val=(_this->val&~((opus_uint32)mask<error=-1; -} - -void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){ - celt_assert(_this->offs+_this->end_offs<=_size); - OPUS_MOVE(_this->buf+_size-_this->end_offs, - _this->buf+_this->storage-_this->end_offs,_this->end_offs); - _this->storage=_size; -} - -void ec_enc_done(ec_enc *_this){ - ec_window window; - int used; - opus_uint32 msk; - opus_uint32 end; - int l; - /*We output the minimum number of bits that ensures that the symbols encoded - thus far will be decoded correctly regardless of the bits that follow.*/ - l=EC_CODE_BITS-EC_ILOG(_this->rng); - msk=(EC_CODE_TOP-1)>>l; - end=(_this->val+msk)&~msk; - if((end|msk)>=_this->val+_this->rng){ - l++; - msk>>=1; - end=(_this->val+msk)&~msk; - } - while(l>0){ - ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT)); - end=(end<rem>=0||_this->ext>0)ec_enc_carry_out(_this,0); - /*If we have buffered extra bits, flush them as well.*/ - window=_this->end_window; - used=_this->nend_bits; - while(used>=EC_SYM_BITS){ - _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); - window>>=EC_SYM_BITS; - used-=EC_SYM_BITS; - } - /*Clear any excess space and add any remaining extra bits to the last byte.*/ - if(!_this->error){ - OPUS_CLEAR(_this->buf+_this->offs, - _this->storage-_this->offs-_this->end_offs); - if(used>0){ - /*If there's no range coder data at all, give up.*/ - if(_this->end_offs>=_this->storage)_this->error=-1; - else{ - l=-l; - /*If we've busted, don't add too many extra bits to the last byte; it - would corrupt the range coder data, and that's more important.*/ - if(_this->offs+_this->end_offs>=_this->storage&&lerror=-1; - } - _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window; - } - } - } -} diff --git a/thirdparty/opus/celt/entenc.h b/thirdparty/opus/celt/entenc.h deleted file mode 100644 index 796bc4d572..0000000000 --- a/thirdparty/opus/celt/entenc.h +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(_entenc_H) -# define _entenc_H (1) -# include -# include "entcode.h" - -/*Initializes the encoder. - _buf: The buffer to store output bytes in. - _size: The size of the buffer, in chars.*/ -void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size); -/*Encodes a symbol given its frequency information. - The frequency information must be discernable by the decoder, assuming it - has read only the previous symbols from the stream. - It is allowable to change the frequency information, or even the entire - source alphabet, so long as the decoder can tell from the context of the - previously encoded information that it is supposed to do so as well. - _fl: The cumulative frequency of all symbols that come before the one to be - encoded. - _fh: The cumulative frequency of all symbols up to and including the one to - be encoded. - Together with _fl, this defines the range [_fl,_fh) in which the - decoded value will fall. - _ft: The sum of the frequencies of all the symbols*/ -void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft); - -/*Equivalent to ec_encode() with _ft==1<<_bits.*/ -void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits); - -/* Encode a bit that has a 1/(1<<_logp) probability of being a one */ -void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp); - -/*Encodes a symbol given an "inverse" CDF table. - _s: The index of the symbol to encode. - _icdf: The "inverse" CDF, such that symbol _s falls in the range - [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb. - The values must be monotonically non-increasing, and the last value - must be 0. - _ftb: The number of bits of precision in the cumulative distribution.*/ -void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb); - -/*Encodes a raw unsigned integer in the stream. - _fl: The integer to encode. - _ft: The number of integers that can be encoded (one more than the max). - This must be at least one, and no more than 2**32-1.*/ -void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft); - -/*Encodes a sequence of raw bits in the stream. - _fl: The bits to encode. - _ftb: The number of bits to encode. - This must be between 1 and 25, inclusive.*/ -void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb); - -/*Overwrites a few bits at the very start of an existing stream, after they - have already been encoded. - This makes it possible to have a few flags up front, where it is easy for - decoders to access them without parsing the whole stream, even if their - values are not determined until late in the encoding process, without having - to buffer all the intermediate symbols in the encoder. - In order for this to work, at least _nbits bits must have already been - encoded using probabilities that are an exact power of two. - The encoder can verify the number of encoded bits is sufficient, but cannot - check this latter condition. - _val: The bits to encode (in the least _nbits significant bits). - They will be decoded in order from most-significant to least. - _nbits: The number of bits to overwrite. - This must be no more than 8.*/ -void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits); - -/*Compacts the data to fit in the target size. - This moves up the raw bits at the end of the current buffer so they are at - the end of the new buffer size. - The caller must ensure that the amount of data that's already been written - will fit in the new size. - _size: The number of bytes in the new buffer. - This must be large enough to contain the bits already written, and - must be no larger than the existing size.*/ -void ec_enc_shrink(ec_enc *_this,opus_uint32 _size); - -/*Indicates that there are no more symbols to encode. - All reamining output bytes are flushed to the output buffer. - ec_enc_init() must be called before the encoder can be used again.*/ -void ec_enc_done(ec_enc *_this); - -#endif diff --git a/thirdparty/opus/celt/fixed_debug.h b/thirdparty/opus/celt/fixed_debug.h deleted file mode 100644 index d28227f5dc..0000000000 --- a/thirdparty/opus/celt/fixed_debug.h +++ /dev/null @@ -1,784 +0,0 @@ -/* Copyright (C) 2003-2008 Jean-Marc Valin - Copyright (C) 2007-2012 Xiph.Org Foundation */ -/** - @file fixed_debug.h - @brief Fixed-point operations with debugging -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_DEBUG_H -#define FIXED_DEBUG_H - -#include -#include "opus_defines.h" - -#ifdef CELT_C -OPUS_EXPORT opus_int64 celt_mips=0; -#else -extern opus_int64 celt_mips; -#endif - -#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) -#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15)) - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16)) - -#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16) - -#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) -#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) - -#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768) -#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL) -#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1)) - -#define SHR(a,b) SHR32(a,b) -#define PSHR(a,b) PSHR32(a,b) - -static OPUS_INLINE short NEG16(int x) -{ - int res; - if (!VERIFY_SHORT(x)) - { - fprintf (stderr, "NEG16: input is not short: %d\n", (int)x); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = -x; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "NEG16: output is not short: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} -static OPUS_INLINE int NEG32(opus_int64 x) -{ - opus_int64 res; - if (!VERIFY_INT(x)) - { - fprintf (stderr, "NEG16: input is not int: %d\n", (int)x); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = -x; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "NEG16: output is not int: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__) -static OPUS_INLINE short EXTRACT16_(int x, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(x)) - { - fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = x; - celt_mips++; - return res; -} - -#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__) -static OPUS_INLINE int EXTEND32_(int x, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(x)) - { - fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = x; - celt_mips++; - return res; -} - -#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__) -static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) - { - fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a>>shift; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} -#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__) -static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) - { - fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a<>shift; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "SHR32: output is not int: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} -#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__) -static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) - { - fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a<>1))),shift)) -#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) - -#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a)))) -#define HALF16(x) (SHR16(x,1)) -#define HALF32(x) (SHR32(x,1)) - -//#define SHR(a,shift) ((a) >> (shift)) -//#define SHL(a,shift) ((a) << (shift)) - -#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__) -static OPUS_INLINE short ADD16_(int a, int b, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a+b; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} - -#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__) -static OPUS_INLINE short SUB16_(int a, int b, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a-b; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} - -#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_INT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a+b; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_INT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a-b; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#undef UADD32 -#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line) -{ - opus_uint64 res; - if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) - { - fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a+b; - if (!VERIFY_UINT(res)) - { - fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#undef USUB32 -#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line) -{ - opus_uint64 res; - if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) - { - fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (a=((opus_val32)(1)<<(15+Q))) - { - fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = (((opus_int64)a)*(opus_int64)b) >> Q; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (Q==15) - celt_mips+=3; - else - celt_mips+=4; - return res; -} - -#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__) -static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (ABS32(b)>=((opus_int64)(1)<<(15+Q))) - { - fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<>1))>> Q; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (Q==15) - celt_mips+=4; - else - celt_mips+=5; - return res; -} - -#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) -#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b)))) -#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b)))) - -static OPUS_INLINE int SATURATE(int a, int b) -{ - if (a>b) - a=b; - if (a<-b) - a = -b; - celt_mips+=3; - return a; -} - -static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a) -{ - celt_mips+=3; - if (a>32767) - return 32767; - else if (a<-32768) - return -32768; - else return a; -} - -static OPUS_INLINE int MULT16_16_Q11_32(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 11; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=3; - return res; -} -static OPUS_INLINE short MULT16_16_Q13(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 13; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=3; - return res; -} -static OPUS_INLINE short MULT16_16_Q14(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 14; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=3; - return res; -} - -#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__) -static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 15; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=1; - return res; -} - -static OPUS_INLINE short MULT16_16_P13(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res += 4096; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res >>= 13; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=4; - return res; -} -static OPUS_INLINE short MULT16_16_P14(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res += 8192; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res >>= 14; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=4; - return res; -} -static OPUS_INLINE short MULT16_16_P15(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res += 16384; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res >>= 15; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__) - -static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (b==0) - { - fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - return 0; - } - if (!VERIFY_INT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a/b; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line); - if (res>32767) - res = 32767; - if (res<-32768) - res = -32768; -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=35; - return res; -} - -#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (b==0) - { - fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - return 0; - } - - if (!VERIFY_INT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a/b; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=70; - return res; -} - -static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) -{ - x = PSHR32(x, SIG_SHIFT); - x = MAX32(x, -32768); - x = MIN32(x, 32767); - return EXTRACT16(x); -} -#define SIG2WORD16(x) (SIG2WORD16_generic(x)) - - -#undef PRINT_MIPS -#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0); - -#endif diff --git a/thirdparty/opus/celt/fixed_generic.h b/thirdparty/opus/celt/fixed_generic.h deleted file mode 100644 index 1cfd6d6989..0000000000 --- a/thirdparty/opus/celt/fixed_generic.h +++ /dev/null @@ -1,167 +0,0 @@ -/* Copyright (C) 2007-2009 Xiph.Org Foundation - Copyright (C) 2003-2008 Jean-Marc Valin - Copyright (C) 2007-2008 CSIRO */ -/** - @file fixed_generic.h - @brief Generic fixed-point operations -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_GENERIC_H -#define FIXED_GENERIC_H - -/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */ -#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16)) -#else -#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) -#endif - -/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16)) -#else -#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) -#endif - -/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15)) -#else -#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) -#endif - -/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31)) -#else -#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) -#endif - -/** Compile-time conversion of float constant to 16-bit value */ -#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) - -/** Compile-time conversion of float constant to 32-bit value */ -#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) - -/** Negate a 16-bit value */ -#define NEG16(x) (-(x)) -/** Negate a 32-bit value */ -#define NEG32(x) (-(x)) - -/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */ -#define EXTRACT16(x) ((opus_val16)(x)) -/** Change a 16-bit value into a 32-bit value */ -#define EXTEND32(x) ((opus_val32)(x)) - -/** Arithmetic shift-right of a 16-bit value */ -#define SHR16(a,shift) ((a) >> (shift)) -/** Arithmetic shift-left of a 16-bit value */ -#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift))) -/** Arithmetic shift-right of a 32-bit value */ -#define SHR32(a,shift) ((a) >> (shift)) -/** Arithmetic shift-left of a 32-bit value */ -#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift))) - -/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */ -#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift)) -/** 32-bit arithmetic shift right where the argument can be negative */ -#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) - -/** "RAW" macros, should not be used outside of this header file */ -#define SHR(a,shift) ((a) >> (shift)) -#define SHL(a,shift) SHL32(a,shift) -#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift)) -#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) - -#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x))) - -/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ -#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) -/** Divide by two */ -#define HALF16(x) (SHR16(x,1)) -#define HALF32(x) (SHR32(x,1)) - -/** Add two 16-bit values */ -#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b))) -/** Subtract two 16-bit values */ -#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b)) -/** Add two 32-bit values */ -#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b)) -/** Subtract two 32-bit values */ -#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b)) - -/** 16x16 multiplication where the result fits in 16 bits */ -#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) - -/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */ -/** 16x16 multiplication where the result fits in 32 bits */ -#define MULT16_16(a,b) (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b))) - -/** 16x16 multiply-add where the result fits in 32 bits */ -#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b)))) -/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. - b must fit in 31 bits. - Result fits in 32 bits. */ -#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) - -/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. - Results fits in 32 bits */ -#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) - -#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) -#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) -#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) -#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) -#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) - -#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13)) -#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14)) -#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15)) - -/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */ -#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b)))) - -/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ -#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) - -#if defined(MIPSr1_ASM) -#include "mips/fixed_generic_mipsr1.h" -#endif - -static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) -{ - x = PSHR32(x, SIG_SHIFT); - x = MAX32(x, -32768); - x = MIN32(x, 32767); - return EXTRACT16(x); -} -#define SIG2WORD16(x) (SIG2WORD16_generic(x)) - -#endif diff --git a/thirdparty/opus/celt/float_cast.h b/thirdparty/opus/celt/float_cast.h deleted file mode 100644 index ed5a39b543..0000000000 --- a/thirdparty/opus/celt/float_cast.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (C) 2001 Erik de Castro Lopo */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Version 1.1 */ - -#ifndef FLOAT_CAST_H -#define FLOAT_CAST_H - - -#include "arch.h" - -/*============================================================================ -** On Intel Pentium processors (especially PIII and probably P4), converting -** from float to int is very slow. To meet the C specs, the code produced by -** most C compilers targeting Pentium needs to change the FPU rounding mode -** before the float to int conversion is performed. -** -** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It -** is this flushing of the pipeline which is so slow. -** -** Fortunately the ISO C99 specifications define the functions lrint, lrintf, -** llrint and llrintf which fix this problem as a side effect. -** -** On Unix-like systems, the configure process should have detected the -** presence of these functions. If they weren't found we have to replace them -** here with a standard C cast. -*/ - -/* -** The C99 prototypes for lrint and lrintf are as follows: -** -** long int lrintf (float x) ; -** long int lrint (double x) ; -*/ - -/* The presence of the required functions are detected during the configure -** process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in -** the config.h file. -*/ - -#if (HAVE_LRINTF) - -/* These defines enable functionality introduced with the 1999 ISO C -** standard. They must be defined before the inclusion of math.h to -** engage them. If optimisation is enabled, these functions will be -** inlined. With optimisation switched off, you have to link in the -** maths library using -lm. -*/ - -#define _ISOC9X_SOURCE 1 -#define _ISOC99_SOURCE 1 - -#define __USE_ISOC9X 1 -#define __USE_ISOC99 1 - -#include -#define float2int(x) lrintf(x) - -#elif (defined(HAVE_LRINT)) - -#define _ISOC9X_SOURCE 1 -#define _ISOC99_SOURCE 1 - -#define __USE_ISOC9X 1 -#define __USE_ISOC99 1 - -#include -#define float2int(x) lrint(x) - -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64) - #include - - __inline long int float2int(float value) - { - return _mm_cvtss_si32(_mm_load_ss(&value)); - } -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86) - #include - - /* Win32 doesn't seem to have these functions. - ** Therefore implement OPUS_INLINE versions of these functions here. - */ - - __inline long int - float2int (float flt) - { int intgr; - - _asm - { fld flt - fistp intgr - } ; - - return intgr ; - } - -#else - -#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) - /* supported by gcc in C99 mode, but not by all other compilers */ - #warning "Don't have the functions lrint() and lrintf ()." - #warning "Replacing these functions with a standard C cast." -#endif /* __STDC_VERSION__ >= 199901L */ - #include - #define float2int(flt) ((int)(floor(.5+flt))) -#endif - -#ifndef DISABLE_FLOAT_API -static OPUS_INLINE opus_int16 FLOAT2INT16(float x) -{ - x = x*CELT_SIG_SCALE; - x = MAX32(x, -32768); - x = MIN32(x, 32767); - return (opus_int16)float2int(x); -} -#endif /* DISABLE_FLOAT_API */ - -#endif /* FLOAT_CAST_H */ diff --git a/thirdparty/opus/celt/kiss_fft.c b/thirdparty/opus/celt/kiss_fft.c deleted file mode 100644 index 1f8fd05321..0000000000 --- a/thirdparty/opus/celt/kiss_fft.c +++ /dev/null @@ -1,604 +0,0 @@ -/*Copyright (c) 2003-2004, Mark Borgerding - Lots of modifications by Jean-Marc Valin - Copyright (c) 2005-2007, Xiph.Org Foundation - Copyright (c) 2008, Xiph.Org Foundation, CSIRO - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -/* This code is originally from Mark Borgerding's KISS-FFT but has been - heavily modified to better suit Opus */ - -#ifndef SKIP_CONFIG_H -# ifdef HAVE_CONFIG_H -# include "config.h" -# endif -#endif - -#include "_kiss_fft_guts.h" -#include "arch.h" -#include "os_support.h" -#include "mathops.h" -#include "stack_alloc.h" - -/* The guts header contains all the multiplication and addition macros that are defined for - complex numbers. It also delares the kf_ internal functions. -*/ - -static void kf_bfly2( - kiss_fft_cpx * Fout, - int m, - int N - ) -{ - kiss_fft_cpx * Fout2; - int i; - (void)m; -#ifdef CUSTOM_MODES - if (m==1) - { - celt_assert(m==1); - for (i=0;itwiddles; - /* m is guaranteed to be a multiple of 4. */ - for (j=0;jtwiddles[fstride*m]; -#endif - for (i=0;itwiddles; - /* For non-custom modes, m is guaranteed to be a multiple of 4. */ - k=m; - do { - - C_MUL(scratch[1],Fout[m] , *tw1); - C_MUL(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - - ++Fout; - } while(--k); - } -} - - -#ifndef OVERRIDE_kf_bfly5 -static void kf_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int i, u; - kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; - -#ifdef FIXED_POINT - ya.r = 10126; - ya.i = -31164; - yb.r = -26510; - yb.i = -19261; -#else - ya = st->twiddles[fstride*m]; - yb = st->twiddles[fstride*2*m]; -#endif - tw=st->twiddles; - - for (i=0;ir += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); - scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); - scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} -#endif /* OVERRIDE_kf_bfly5 */ - - -#endif - - -#ifdef CUSTOM_MODES - -static -void compute_bitrev_table( - int Fout, - opus_int16 *f, - const size_t fstride, - int in_stride, - opus_int16 * factors, - const kiss_fft_state *st - ) -{ - const int p=*factors++; /* the radix */ - const int m=*factors++; /* stage's fft length/p */ - - /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ - if (m==1) - { - int j; - for (j=0;j32000 || (opus_int32)p*(opus_int32)p > n) - p = n; /* no more factors, skip to end */ - } - n /= p; -#ifdef RADIX_TWO_ONLY - if (p!=2 && p != 4) -#else - if (p>5) -#endif - { - return 0; - } - facbuf[2*stages] = p; - if (p==2 && stages > 1) - { - facbuf[2*stages] = 4; - facbuf[2] = 2; - } - stages++; - } while (n > 1); - n = nbak; - /* Reverse the order to get the radix 4 at the end, so we can use the - fast degenerate case. It turns out that reversing the order also - improves the noise behaviour. */ - for (i=0;i= memneeded) - st = (kiss_fft_state*)mem; - *lenmem = memneeded; - } - if (st) { - opus_int16 *bitrev; - kiss_twiddle_cpx *twiddles; - - st->nfft=nfft; -#ifdef FIXED_POINT - st->scale_shift = celt_ilog2(st->nfft); - if (st->nfft == 1<scale_shift) - st->scale = Q15ONE; - else - st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); -#else - st->scale = 1.f/nfft; -#endif - if (base != NULL) - { - st->twiddles = base->twiddles; - st->shift = 0; - while (st->shift < 32 && nfft<shift != base->nfft) - st->shift++; - if (st->shift>=32) - goto fail; - } else { - st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); - compute_twiddles(twiddles, nfft); - st->shift = -1; - } - if (!kf_factor(nfft,st->factors)) - { - goto fail; - } - - /* bitrev */ - st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); - if (st->bitrev==NULL) - goto fail; - compute_bitrev_table(0, bitrev, 1,1, st->factors,st); - - /* Initialize architecture specific fft parameters */ - if (opus_fft_alloc_arch(st, arch)) - goto fail; - } - return st; -fail: - opus_fft_free(st, arch); - return NULL; -} - -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) -{ - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); -} - -void opus_fft_free_arch_c(kiss_fft_state *st) { - (void)st; -} - -void opus_fft_free(const kiss_fft_state *cfg, int arch) -{ - if (cfg) - { - opus_fft_free_arch((kiss_fft_state *)cfg, arch); - opus_free((opus_int16*)cfg->bitrev); - if (cfg->shift < 0) - opus_free((kiss_twiddle_cpx*)cfg->twiddles); - opus_free((kiss_fft_state*)cfg); - } -} - -#endif /* CUSTOM_MODES */ - -void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) -{ - int m2, m; - int p; - int L; - int fstride[MAXFACTORS]; - int i; - int shift; - - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) - { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - kf_bfly2(fout, m, fstride[i]); - break; - case 4: - kf_bfly4(fout,fstride[i]<scale_shift-1; -#endif - scale = st->scale; - - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;infft;i++) - { - kiss_fft_cpx x = fin[i]; - fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); - fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); - } - opus_fft_impl(st, fout); -} - - -void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) -{ - int i; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;infft;i++) - fout[st->bitrev[i]] = fin[i]; - for (i=0;infft;i++) - fout[i].i = -fout[i].i; - opus_fft_impl(st, fout); - for (i=0;infft;i++) - fout[i].i = -fout[i].i; -} diff --git a/thirdparty/opus/celt/kiss_fft.h b/thirdparty/opus/celt/kiss_fft.h deleted file mode 100644 index bffa2bfad6..0000000000 --- a/thirdparty/opus/celt/kiss_fft.h +++ /dev/null @@ -1,200 +0,0 @@ -/*Copyright (c) 2003-2004, Mark Borgerding - Lots of modifications by Jean-Marc Valin - Copyright (c) 2005-2007, Xiph.Org Foundation - Copyright (c) 2008, Xiph.Org Foundation, CSIRO - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_H -#define KISS_FFT_H - -#include -#include -#include "arch.h" -#include "cpu_support.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef USE_SIMD -# include -# define kiss_fft_scalar __m128 -#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes) -#else -#define KISS_FFT_MALLOC opus_alloc -#endif - -#ifdef FIXED_POINT -#include "arch.h" - -# define kiss_fft_scalar opus_int32 -# define kiss_twiddle_scalar opus_int16 - - -#else -# ifndef kiss_fft_scalar -/* default is float */ -# define kiss_fft_scalar float -# define kiss_twiddle_scalar float -# define KF_SUFFIX _celt_single -# endif -#endif - -typedef struct { - kiss_fft_scalar r; - kiss_fft_scalar i; -}kiss_fft_cpx; - -typedef struct { - kiss_twiddle_scalar r; - kiss_twiddle_scalar i; -}kiss_twiddle_cpx; - -#define MAXFACTORS 8 -/* e.g. an fft of length 128 has 4 factors - as far as kissfft is concerned - 4*4*4*2 - */ - -typedef struct arch_fft_state{ - int is_supported; - void *priv; -} arch_fft_state; - -typedef struct kiss_fft_state{ - int nfft; - opus_val16 scale; -#ifdef FIXED_POINT - int scale_shift; -#endif - int shift; - opus_int16 factors[2*MAXFACTORS]; - const opus_int16 *bitrev; - const kiss_twiddle_cpx *twiddles; - arch_fft_state *arch_fft; -} kiss_fft_state; - -#if defined(HAVE_ARM_NE10) -#include "arm/fft_arm.h" -#endif - -/*typedef struct kiss_fft_state* kiss_fft_cfg;*/ - -/** - * opus_fft_alloc - * - * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. - * - * typical usage: kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL); - * - * The return value from fft_alloc is a cfg buffer used internally - * by the fft routine or NULL. - * - * If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc. - * The returned value should be free()d when done to avoid memory leaks. - * - * The state can be placed in a user supplied buffer 'mem': - * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, - * then the function places the cfg in mem and the size used in *lenmem - * and returns mem. - * - * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), - * then the function returns NULL and places the minimum cfg - * buffer size in *lenmem. - * */ - -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); - -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); - -/** - * opus_fft(cfg,in_out_buf) - * - * Perform an FFT on a complex input buffer. - * for a forward FFT, - * fin should be f[0] , f[1] , ... ,f[nfft-1] - * fout will be F[0] , F[1] , ... ,F[nfft-1] - * Note that each element is complex and can be accessed like - f[k].r and f[k].i - * */ -void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); - -void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); -void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); - -void opus_fft_free(const kiss_fft_state *cfg, int arch); - - -void opus_fft_free_arch_c(kiss_fft_state *st); -int opus_fft_alloc_arch_c(kiss_fft_state *st); - -#if !defined(OVERRIDE_OPUS_FFT) -/* Is run-time CPU detection enabled on this platform? */ -#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) - -extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( - kiss_fft_state *st); - -#define opus_fft_alloc_arch(_st, arch) \ - ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) - -extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( - kiss_fft_state *st); -#define opus_fft_free_arch(_st, arch) \ - ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) - -extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, kiss_fft_cpx *fout); -#define opus_fft(_cfg, _fin, _fout, arch) \ - ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) - -extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, kiss_fft_cpx *fout); -#define opus_ifft(_cfg, _fin, _fout, arch) \ - ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) - -#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ - -#define opus_fft_alloc_arch(_st, arch) \ - ((void)(arch), opus_fft_alloc_arch_c(_st)) - -#define opus_fft_free_arch(_st, arch) \ - ((void)(arch), opus_fft_free_arch_c(_st)) - -#define opus_fft(_cfg, _fin, _fout, arch) \ - ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) - -#define opus_ifft(_cfg, _fin, _fout, arch) \ - ((void)(arch), opus_ifft_c(_cfg, _fin, _fout)) - -#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ -#endif /* end if !defined(OVERRIDE_OPUS_FFT) */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/celt/laplace.c b/thirdparty/opus/celt/laplace.c deleted file mode 100644 index a7bca874b6..0000000000 --- a/thirdparty/opus/celt/laplace.c +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2007 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "laplace.h" -#include "mathops.h" - -/* The minimum probability of an energy delta (out of 32768). */ -#define LAPLACE_LOG_MINP (0) -#define LAPLACE_MINP (1<>15; -} - -void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay) -{ - unsigned fl; - int val = *value; - fl = 0; - if (val) - { - int s; - int i; - s = -(val<0); - val = (val+s)^s; - fl = fs; - fs = ec_laplace_get_freq1(fs, decay); - /* Search the decaying part of the PDF.*/ - for (i=1; fs > 0 && i < val; i++) - { - fs *= 2; - fl += fs+2*LAPLACE_MINP; - fs = (fs*(opus_int32)decay)>>15; - } - /* Everything beyond that has probability LAPLACE_MINP. */ - if (!fs) - { - int di; - int ndi_max; - ndi_max = (32768-fl+LAPLACE_MINP-1)>>LAPLACE_LOG_MINP; - ndi_max = (ndi_max-s)>>1; - di = IMIN(val - i, ndi_max - 1); - fl += (2*di+1+s)*LAPLACE_MINP; - fs = IMIN(LAPLACE_MINP, 32768-fl); - *value = (i+di+s)^s; - } - else - { - fs += LAPLACE_MINP; - fl += fs&~s; - } - celt_assert(fl+fs<=32768); - celt_assert(fs>0); - } - ec_encode_bin(enc, fl, fl+fs, 15); -} - -int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay) -{ - int val=0; - unsigned fl; - unsigned fm; - fm = ec_decode_bin(dec, 15); - fl = 0; - if (fm >= fs) - { - val++; - fl = fs; - fs = ec_laplace_get_freq1(fs, decay)+LAPLACE_MINP; - /* Search the decaying part of the PDF.*/ - while(fs > LAPLACE_MINP && fm >= fl+2*fs) - { - fs *= 2; - fl += fs; - fs = ((fs-2*LAPLACE_MINP)*(opus_int32)decay)>>15; - fs += LAPLACE_MINP; - val++; - } - /* Everything beyond that has probability LAPLACE_MINP. */ - if (fs <= LAPLACE_MINP) - { - int di; - di = (fm-fl)>>(LAPLACE_LOG_MINP+1); - val += di; - fl += 2*di*LAPLACE_MINP; - } - if (fm < fl+fs) - val = -val; - else - fl += fs; - } - celt_assert(fl<32768); - celt_assert(fs>0); - celt_assert(fl<=fm); - celt_assert(fm>1; - b=1U<>=1; - bshift--; - } - while(bshift>=0); - return g; -} - -#ifdef FIXED_POINT - -opus_val32 frac_div32(opus_val32 a, opus_val32 b) -{ - opus_val16 rcp; - opus_val32 result, rem; - int shift = celt_ilog2(b)-29; - a = VSHR32(a,shift); - b = VSHR32(b,shift); - /* 16-bit reciprocal */ - rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); - result = MULT16_32_Q15(rcp, a); - rem = PSHR32(a,2)-MULT32_32_Q31(result, b); - result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); - if (result >= 536870912) /* 2^29 */ - return 2147483647; /* 2^31 - 1 */ - else if (result <= -536870912) /* -2^29 */ - return -2147483647; /* -2^31 */ - else - return SHL32(result, 2); -} - -/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ -opus_val16 celt_rsqrt_norm(opus_val32 x) -{ - opus_val16 n; - opus_val16 r; - opus_val16 r2; - opus_val16 y; - /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ - n = x-32768; - /* Get a rough initial guess for the root. - The optimal minimax quadratic approximation (using relative error) is - r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). - Coefficients here, and the final result r, are Q14.*/ - r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); - /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. - We can compute the result from n and r using Q15 multiplies with some - adjustment, carefully done to avoid overflow. - Range of y is [-1564,1594]. */ - r2 = MULT16_16_Q15(r, r); - y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); - /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). - This yields the Q14 reciprocal square root of the Q16 x, with a maximum - relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a - peak absolute error of 2.26591/16384. */ - return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, - SUB16(MULT16_16_Q15(y, 12288), 16384)))); -} - -/** Sqrt approximation (QX input, QX/2 output) */ -opus_val32 celt_sqrt(opus_val32 x) -{ - int k; - opus_val16 n; - opus_val32 rt; - static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; - if (x==0) - return 0; - else if (x>=1073741824) - return 32767; - k = (celt_ilog2(x)>>1)-7; - x = VSHR32(x, 2*k); - n = x-32768; - rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], - MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); - rt = VSHR32(rt,7-k); - return rt; -} - -#define L1 32767 -#define L2 -7651 -#define L3 8277 -#define L4 -626 - -static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) -{ - opus_val16 x2; - - x2 = MULT16_16_P15(x,x); - return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 - )))))))); -} - -#undef L1 -#undef L2 -#undef L3 -#undef L4 - -opus_val16 celt_cos_norm(opus_val32 x) -{ - x = x&0x0001ffff; - if (x>SHL32(EXTEND32(1), 16)) - x = SUB32(SHL32(EXTEND32(1), 17),x); - if (x&0x00007fff) - { - if (x0, "celt_rcp() only defined for positive values"); - i = celt_ilog2(x); - /* n is Q15 with range [0,1). */ - n = VSHR32(x,i-15)-32768; - /* Start with a linear approximation: - r = 1.8823529411764706-0.9411764705882353*n. - The coefficients and the result are Q14 in the range [15420,30840].*/ - r = ADD16(30840, MULT16_16_Q15(-15420, n)); - /* Perform two Newton iterations: - r -= r*((r*n)-1.Q15) - = r*((r*n)+(r-1.Q15)). */ - r = SUB16(r, MULT16_16_Q15(r, - ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); - /* We subtract an extra 1 in the second iteration to avoid overflow; it also - neatly compensates for truncation error in the rest of the process. */ - r = SUB16(r, ADD16(1, MULT16_16_Q15(r, - ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); - /* r is now the Q15 solution to 2/(n+1), with a maximum relative error - of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute - error of 1.24665/32768. */ - return VSHR32(EXTEND32(r),i-16); -} - -#endif diff --git a/thirdparty/opus/celt/mathops.h b/thirdparty/opus/celt/mathops.h deleted file mode 100644 index a0525a9610..0000000000 --- a/thirdparty/opus/celt/mathops.h +++ /dev/null @@ -1,258 +0,0 @@ -/* Copyright (c) 2002-2008 Jean-Marc Valin - Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file mathops.h - @brief Various math functions -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef MATHOPS_H -#define MATHOPS_H - -#include "arch.h" -#include "entcode.h" -#include "os_support.h" - -/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ -#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15) - -unsigned isqrt32(opus_uint32 _val); - -#ifndef OVERRIDE_CELT_MAXABS16 -static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) -{ - int i; - opus_val16 maxval = 0; - opus_val16 minval = 0; - for (i=0;i>23)-127; - in.i -= integer<<23; - frac = in.f - 1.5f; - frac = -0.41445418f + frac*(0.95909232f - + frac*(-0.33951290f + frac*0.16541097f)); - return 1+integer+frac; -} - -/** Base-2 exponential approximation (2^x). */ -static OPUS_INLINE float celt_exp2(float x) -{ - int integer; - float frac; - union { - float f; - opus_uint32 i; - } res; - integer = floor(x); - if (integer < -50) - return 0; - frac = x-integer; - /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */ - res.f = 0.99992522f + frac * (0.69583354f - + frac * (0.22606716f + 0.078024523f*frac)); - res.i = (res.i + (integer<<23)) & 0x7fffffff; - return res.f; -} - -#else -#define celt_log2(x) ((float)(1.442695040888963387*log(x))) -#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x))) -#endif - -#endif - -#ifdef FIXED_POINT - -#include "os_support.h" - -#ifndef OVERRIDE_CELT_ILOG2 -/** Integer log in base2. Undefined for zero and negative numbers */ -static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x) -{ - celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers"); - return EC_ILOG(x)-1; -} -#endif - - -/** Integer log in base2. Defined for zero, but not for negative numbers */ -static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x) -{ - return x <= 0 ? 0 : celt_ilog2(x); -} - -opus_val16 celt_rsqrt_norm(opus_val32 x); - -opus_val32 celt_sqrt(opus_val32 x); - -opus_val16 celt_cos_norm(opus_val32 x); - -/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */ -static OPUS_INLINE opus_val16 celt_log2(opus_val32 x) -{ - int i; - opus_val16 n, frac; - /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605, - 0.15530808010959576, -0.08556153059057618 */ - static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401}; - if (x==0) - return -32767; - i = celt_ilog2(x); - n = VSHR32(x,i-15)-32768-16384; - frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4])))))))); - return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT); -} - -/* - K0 = 1 - K1 = log(2) - K2 = 3-4*log(2) - K3 = 3*log(2) - 2 -*/ -#define D0 16383 -#define D1 22804 -#define D2 14819 -#define D3 10204 - -static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x) -{ - opus_val16 frac; - frac = SHL16(x, 4); - return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac)))))); -} -/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */ -static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x) -{ - int integer; - opus_val16 frac; - integer = SHR16(x,10); - if (integer>14) - return 0x7f000000; - else if (integer < -15) - return 0; - frac = celt_exp2_frac(x-SHL16(integer,10)); - return VSHR32(EXTEND32(frac), -integer-2); -} - -opus_val32 celt_rcp(opus_val32 x); - -#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b)) - -opus_val32 frac_div32(opus_val32 a, opus_val32 b); - -#define M1 32767 -#define M2 -21 -#define M3 -11943 -#define M4 4936 - -/* Atan approximation using a 4th order polynomial. Input is in Q15 format - and normalized by pi/4. Output is in Q15 format */ -static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x) -{ - return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x))))))); -} - -#undef M1 -#undef M2 -#undef M3 -#undef M4 - -/* atan2() approximation valid for positive input values */ -static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x) -{ - if (y < x) - { - opus_val32 arg; - arg = celt_div(SHL32(EXTEND32(y),15),x); - if (arg >= 32767) - arg = 32767; - return SHR16(celt_atan01(EXTRACT16(arg)),1); - } else { - opus_val32 arg; - arg = celt_div(SHL32(EXTEND32(x),15),y); - if (arg >= 32767) - arg = 32767; - return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1); - } -} - -#endif /* FIXED_POINT */ -#endif /* MATHOPS_H */ diff --git a/thirdparty/opus/celt/mdct.c b/thirdparty/opus/celt/mdct.c deleted file mode 100644 index 5315ad11a3..0000000000 --- a/thirdparty/opus/celt/mdct.c +++ /dev/null @@ -1,343 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* This is a simple MDCT implementation that uses a N/4 complex FFT - to do most of the work. It should be relatively straightforward to - plug in pretty much and FFT here. - - This replaces the Vorbis FFT (and uses the exact same API), which - was a bit too messy and that was ending up duplicating code - (might as well use the same FFT everywhere). - - The algorithm is similar to (and inspired from) Fabrice Bellard's - MDCT implementation in FFMPEG, but has differences in signs, ordering - and scaling in many places. -*/ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include "mdct.h" -#include "kiss_fft.h" -#include "_kiss_fft_guts.h" -#include -#include "os_support.h" -#include "mathops.h" -#include "stack_alloc.h" - -#if defined(MIPSr1_ASM) -#include "mips/mdct_mipsr1.h" -#endif - - -#ifdef CUSTOM_MODES - -int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch) -{ - int i; - kiss_twiddle_scalar *trig; - int shift; - int N2=N>>1; - l->n = N; - l->maxshift = maxshift; - for (i=0;i<=maxshift;i++) - { - if (i==0) - l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch); - else - l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch); -#ifndef ENABLE_TI_DSPLIB55 - if (l->kfft[i]==NULL) - return 0; -#endif - } - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); - if (l->trig==NULL) - return 0; - for (shift=0;shift<=maxshift;shift++) - { - /* We have enough points that sine isn't necessary */ -#if defined(FIXED_POINT) -#if 1 - for (i=0;i>= 1; - N >>= 1; - } - return 1; -} - -void clt_mdct_clear(mdct_lookup *l, int arch) -{ - int i; - for (i=0;i<=l->maxshift;i++) - opus_fft_free(l->kfft[i], arch); - opus_free((kiss_twiddle_scalar*)l->trig); -} - -#endif /* CUSTOM_MODES */ - -/* Forward MDCT trashes the input array */ -#ifndef OVERRIDE_clt_mdct_forward -void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_cpx, f2); - const kiss_fft_state *st = l->kfft[shift]; - const kiss_twiddle_scalar *trig; - opus_val16 scale; -#ifdef FIXED_POINT - /* Allows us to scale with MULT16_32_Q16(), which is faster than - MULT16_32_Q15() on ARM. */ - int scale_shift = st->scale_shift-1; -#endif - SAVE_STACK; - (void)arch; - scale = st->scale; - - N = l->n; - trig = l->trig; - for (i=0;i>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N4, kiss_fft_cpx); - - /* Consider the input to be composed of four blocks: [a, b, c, d] */ - /* Window, shuffle, fold */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); - const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; - for(i=0;i<((overlap+3)>>2);i++) - { - /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ - *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); - *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - wp1 = window; - wp2 = window+overlap-1; - for(;i>2);i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = *xp2; - *yp++ = *xp1; - xp1+=2; - xp2-=2; - } - for(;ibitrev[i]] = yc; - } - } - - /* N/4 complex FFT, does not downscale anymore */ - opus_fft_impl(st, f2); - - /* Post-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_cpx * OPUS_RESTRICT fp = f2; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &trig[0]; - /* Temp pointers to make it really clear to the compiler what we're doing */ - for(i=0;ii,t[N4+i]) - S_MUL(fp->r,t[i]); - yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); - *yp1 = yr; - *yp2 = yi; - fp++; - yp1 += 2*stride; - yp2 -= 2*stride; - } - } - RESTORE_STACK; -} -#endif /* OVERRIDE_clt_mdct_forward */ - -#ifndef OVERRIDE_clt_mdct_backward -void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - const kiss_twiddle_scalar *trig; - (void) arch; - - N = l->n; - trig = l->trig; - for (i=0;i>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - /* Pre-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); - const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; - const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; - for(i=0;ikfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); - - /* Post-rotate and de-shuffle from both ends of the buffer at once to make - it in-place. */ - { - kiss_fft_scalar * yp0 = out+(overlap>>1); - kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &trig[0]; - /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the - middle pair will be computed twice. */ - for(i=0;i<(N4+1)>>1;i++) - { - kiss_fft_scalar re, im, yr, yi; - kiss_twiddle_scalar t0, t1; - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp0[1]; - im = yp0[0]; - t0 = t[i]; - t1 = t[N4+i]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp1[1]; - im = yp1[0]; - yp0[0] = yr; - yp1[1] = yi; - - t0 = t[(N4-i-1)]; - t1 = t[(N2-i-1)]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - yp1[0] = yr; - yp0[1] = yi; - yp0 += 2; - yp1 -= 2; - } - } - - /* Mirror on both sides for TDAC */ - { - kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - const opus_val16 * OPUS_RESTRICT wp1 = window; - const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - - for(i = 0; i < overlap/2; i++) - { - kiss_fft_scalar x1, x2; - x1 = *xp1; - x2 = *yp1; - *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); - *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); - wp1++; - wp2--; - } - } -} -#endif /* OVERRIDE_clt_mdct_backward */ diff --git a/thirdparty/opus/celt/mdct.h b/thirdparty/opus/celt/mdct.h deleted file mode 100644 index 160ae4e0f3..0000000000 --- a/thirdparty/opus/celt/mdct.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* This is a simple MDCT implementation that uses a N/4 complex FFT - to do most of the work. It should be relatively straightforward to - plug in pretty much and FFT here. - - This replaces the Vorbis FFT (and uses the exact same API), which - was a bit too messy and that was ending up duplicating code - (might as well use the same FFT everywhere). - - The algorithm is similar to (and inspired from) Fabrice Bellard's - MDCT implementation in FFMPEG, but has differences in signs, ordering - and scaling in many places. -*/ - -#ifndef MDCT_H -#define MDCT_H - -#include "opus_defines.h" -#include "kiss_fft.h" -#include "arch.h" - -typedef struct { - int n; - int maxshift; - const kiss_fft_state *kfft[4]; - const kiss_twiddle_scalar * OPUS_RESTRICT trig; -} mdct_lookup; - -#if defined(HAVE_ARM_NE10) -#include "arm/mdct_arm.h" -#endif - - -int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch); -void clt_mdct_clear(mdct_lookup *l, int arch); - -/** Compute a forward MDCT and scale by 4/N, trashes the input array */ -void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, - int shift, int stride, int arch); - -/** Compute a backward MDCT (no scaling) and performs weighted overlap-add - (scales implicitly by 1/2) */ -void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, - int overlap, int shift, int stride, int arch); - -#if !defined(OVERRIDE_OPUS_MDCT) -/* Is run-time CPU detection enabled on this platform? */ -#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) - -extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])( - const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, - int overlap, int shift, int stride, int arch); - -#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \ - _window, _overlap, _shift, \ - _stride, _arch)) - -extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])( - const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, - int overlap, int shift, int stride, int arch); - -#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - (*CLT_MDCT_BACKWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \ - _window, _overlap, _shift, \ - _stride, _arch) - -#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) */ - -#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) - -#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) - -#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */ -#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */ - -#endif diff --git a/thirdparty/opus/celt/mfrngcod.h b/thirdparty/opus/celt/mfrngcod.h deleted file mode 100644 index 809152a59a..0000000000 --- a/thirdparty/opus/celt/mfrngcod.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2001-2008 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(_mfrngcode_H) -# define _mfrngcode_H (1) -# include "entcode.h" - -/*Constants used by the entropy encoder/decoder.*/ - -/*The number of bits to output at a time.*/ -# define EC_SYM_BITS (8) -/*The total number of bits in each of the state registers.*/ -# define EC_CODE_BITS (32) -/*The maximum symbol value.*/ -# define EC_SYM_MAX ((1U<>EC_SYM_BITS) -/*The number of bits available for the last, partial symbol in the code field.*/ -# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1) -#endif diff --git a/thirdparty/opus/celt/mips/celt_mipsr1.h b/thirdparty/opus/celt/mips/celt_mipsr1.h deleted file mode 100644 index e85661a661..0000000000 --- a/thirdparty/opus/celt/mips/celt_mipsr1.h +++ /dev/null @@ -1,151 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2010 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef __CELT_MIPSR1_H__ -#define __CELT_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define CELT_C - -#include "os_support.h" -#include "mdct.h" -#include -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include -#include "celt_lpc.h" -#include "vq.h" - -#define OVERRIDE_comb_filter -void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, - opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, - const opus_val16 *window, int overlap, int arch) -{ - int i; - opus_val32 x0, x1, x2, x3, x4; - - (void)arch; - - /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ - opus_val16 g00, g01, g02, g10, g11, g12; - static const opus_val16 gains[3][3] = { - {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, - {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, - {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; - - if (g0==0 && g1==0) - { - /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ - if (x!=y) - OPUS_MOVE(y, x, N); - return; - } - - g00 = MULT16_16_P15(g0, gains[tapset0][0]); - g01 = MULT16_16_P15(g0, gains[tapset0][1]); - g02 = MULT16_16_P15(g0, gains[tapset0][2]); - g10 = MULT16_16_P15(g1, gains[tapset1][0]); - g11 = MULT16_16_P15(g1, gains[tapset1][1]); - g12 = MULT16_16_P15(g1, gains[tapset1][2]); - x1 = x[-T1+1]; - x2 = x[-T1 ]; - x3 = x[-T1-1]; - x4 = x[-T1-2]; - /* If the filter didn't change, we don't need the overlap */ - if (g0==g1 && T0==T1 && tapset0==tapset1) - overlap=0; - - for (i=0;itwiddles[fstride*m]; - yb = st->twiddles[fstride*2*m]; -#endif - - tw=st->twiddles; - - for (i=0;ir += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r); - - scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i); - scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r); - - scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i); - scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} - - -#endif /* KISS_FFT_MIPSR1_H */ diff --git a/thirdparty/opus/celt/mips/mdct_mipsr1.h b/thirdparty/opus/celt/mips/mdct_mipsr1.h deleted file mode 100644 index 2934dab776..0000000000 --- a/thirdparty/opus/celt/mips/mdct_mipsr1.h +++ /dev/null @@ -1,288 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* This is a simple MDCT implementation that uses a N/4 complex FFT - to do most of the work. It should be relatively straightforward to - plug in pretty much and FFT here. - - This replaces the Vorbis FFT (and uses the exact same API), which - was a bit too messy and that was ending up duplicating code - (might as well use the same FFT everywhere). - - The algorithm is similar to (and inspired from) Fabrice Bellard's - MDCT implementation in FFMPEG, but has differences in signs, ordering - and scaling in many places. -*/ -#ifndef __MDCT_MIPSR1_H__ -#define __MDCT_MIPSR1_H__ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include "mdct.h" -#include "kiss_fft.h" -#include "_kiss_fft_guts.h" -#include -#include "os_support.h" -#include "mathops.h" -#include "stack_alloc.h" - -/* Forward MDCT trashes the input array */ -#define OVERRIDE_clt_mdct_forward -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_cpx, f2); - const kiss_fft_state *st = l->kfft[shift]; - const kiss_twiddle_scalar *trig; - opus_val16 scale; -#ifdef FIXED_POINT - /* Allows us to scale with MULT16_32_Q16(), which is faster than - MULT16_32_Q15() on ARM. */ - int scale_shift = st->scale_shift-1; -#endif - - (void)arch; - - SAVE_STACK; - scale = st->scale; - - N = l->n; - trig = l->trig; - for (i=0;i>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N4, kiss_fft_cpx); - - /* Consider the input to be composed of four blocks: [a, b, c, d] */ - /* Window, shuffle, fold */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); - const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; - for(i=0;i<((overlap+3)>>2);i++) - { - /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ - *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2); - *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - wp1 = window; - wp2 = window+overlap-1; - for(;i>2);i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = *xp2; - *yp++ = *xp1; - xp1+=2; - xp2-=2; - } - for(;ibitrev[i]] = yc; - } - } - - /* N/4 complex FFT, does not downscale anymore */ - opus_fft_impl(st, f2); - - /* Post-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_cpx * OPUS_RESTRICT fp = f2; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &trig[0]; - /* Temp pointers to make it really clear to the compiler what we're doing */ - for(i=0;ii,t[N4+i] , fp->r,t[i]); - yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]); - *yp1 = yr; - *yp2 = yi; - fp++; - yp1 += 2*stride; - yp2 -= 2*stride; - } - } - RESTORE_STACK; -} - -#define OVERRIDE_clt_mdct_backward -void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - const kiss_twiddle_scalar *trig; - - (void)arch; - - N = l->n; - trig = l->trig; - for (i=0;i>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - /* Pre-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); - const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; - const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; - for(i=0;ikfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); - - /* Post-rotate and de-shuffle from both ends of the buffer at once to make - it in-place. */ - { - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &trig[0]; - /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the - middle pair will be computed twice. */ - for(i=0;i<(N4+1)>>1;i++) - { - kiss_fft_scalar re, im, yr, yi; - kiss_twiddle_scalar t0, t1; - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp0[1]; - im = yp0[0]; - t0 = t[i]; - t1 = t[N4+i]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL_ADD(re,t0 , im,t1); - yi = S_MUL_SUB(re,t1 , im,t0); - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp1[1]; - im = yp1[0]; - yp0[0] = yr; - yp1[1] = yi; - - t0 = t[(N4-i-1)]; - t1 = t[(N2-i-1)]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL_ADD(re,t0,im,t1); - yi = S_MUL_SUB(re,t1,im,t0); - yp1[0] = yr; - yp0[1] = yi; - yp0 += 2; - yp1 -= 2; - } - } - - /* Mirror on both sides for TDAC */ - { - kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - const opus_val16 * OPUS_RESTRICT wp1 = window; - const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - - for(i = 0; i < overlap/2; i++) - { - kiss_fft_scalar x1, x2; - x1 = *xp1; - x2 = *yp1; - *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); - *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); - wp1++; - wp2--; - } - } -} -#endif /* __MDCT_MIPSR1_H__ */ diff --git a/thirdparty/opus/celt/mips/pitch_mipsr1.h b/thirdparty/opus/celt/mips/pitch_mipsr1.h deleted file mode 100644 index a9500aff58..0000000000 --- a/thirdparty/opus/celt/mips/pitch_mipsr1.h +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file pitch.h - @brief Pitch analysis - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef PITCH_MIPSR1_H -#define PITCH_MIPSR1_H - -#define OVERRIDE_DUAL_INNER_PROD -static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2, int arch) -{ - int j; - opus_val32 xy01=0; - opus_val32 xy02=0; - - (void)arch; - - asm volatile("MULT $ac1, $0, $0"); - asm volatile("MULT $ac2, $0, $0"); - /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ - for (j=0;j=0;i--) - { - celt_norm x1, x2; - x1 = Xptr[0]; - x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); - *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); - } -} - -#define OVERRIDE_renormalise_vector - -#define renormalise_vector(X, N, gain, arch) \ - (renormalise_vector_mips(X, N, gain, arch)) - -void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch) -{ - int i; -#ifdef FIXED_POINT - int k; -#endif - opus_val32 E = EPSILON; - opus_val16 g; - opus_val32 t; - celt_norm *xptr = X; - int X0, X1; - - (void)arch; - - asm volatile("mult $ac1, $0, $0"); - asm volatile("MTLO %0, $ac1" : :"r" (E)); - /*if(N %4) - printf("error");*/ - for (i=0;i>1; -#endif - t = VSHR32(E, 2*(k-7)); - g = MULT16_16_P15(celt_rsqrt_norm(t),gain); - - xptr = X; - for (i=0;i= Fs) - break; - - /* Find where the linear part ends (i.e. where the spacing is more than min_width */ - for (lin=0;lin= res) - break; - - low = (bark_freq[lin]+res/2)/res; - high = nBark-lin; - *nbEBands = low+high; - eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+2)); - - if (eBands==NULL) - return NULL; - - /* Linear spacing (min_width) */ - for (i=0;i0) - offset = eBands[low-1]*res - bark_freq[lin-1]; - /* Spacing follows critical bands */ - for (i=0;i frame_size) - eBands[*nbEBands] = frame_size; - for (i=1;i<*nbEBands-1;i++) - { - if (eBands[i+1]-eBands[i] < eBands[i]-eBands[i-1]) - { - eBands[i] -= (2*eBands[i]-eBands[i-1]-eBands[i+1])/2; - } - } - /* Remove any empty bands. */ - for (i=j=0;i<*nbEBands;i++) - if(eBands[i+1]>eBands[j]) - eBands[++j]=eBands[i+1]; - *nbEBands=j; - - for (i=1;i<*nbEBands;i++) - { - /* Every band must be smaller than the last band. */ - celt_assert(eBands[i]-eBands[i-1]<=eBands[*nbEBands]-eBands[*nbEBands-1]); - /* Each band must be no larger than twice the size of the previous one. */ - celt_assert(eBands[i+1]-eBands[i]<=2*(eBands[i]-eBands[i-1])); - } - - return eBands; -} - -static void compute_allocation_table(CELTMode *mode) -{ - int i, j; - unsigned char *allocVectors; - int maxBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1; - - mode->nbAllocVectors = BITALLOC_SIZE; - allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands)); - if (allocVectors==NULL) - return; - - /* Check for standard mode */ - if (mode->Fs == 400*(opus_int32)mode->shortMdctSize) - { - for (i=0;inbEBands;i++) - allocVectors[i] = band_allocation[i]; - mode->allocVectors = allocVectors; - return; - } - /* If not the standard mode, interpolate */ - /* Compute per-codec-band allocation from per-critical-band matrix */ - for (i=0;inbEBands;j++) - { - int k; - for (k=0;k mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize) - break; - } - if (k>maxBands-1) - allocVectors[i*mode->nbEBands+j] = band_allocation[i*maxBands + maxBands-1]; - else { - opus_int32 a0, a1; - a1 = mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize - 400*(opus_int32)eband5ms[k-1]; - a0 = 400*(opus_int32)eband5ms[k] - mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize; - allocVectors[i*mode->nbEBands+j] = (a0*band_allocation[i*maxBands+k-1] - + a1*band_allocation[i*maxBands+k])/(a0+a1); - } - } - } - - /*printf ("\n"); - for (i=0;inbEBands;j++) - printf ("%d ", allocVectors[i*mode->nbEBands+j]); - printf ("\n"); - } - exit(0);*/ - - mode->allocVectors = allocVectors; -} - -#endif /* CUSTOM_MODES */ - -CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error) -{ - int i; -#ifdef CUSTOM_MODES - CELTMode *mode=NULL; - int res; - opus_val16 *window; - opus_int16 *logN; - int LM; - int arch = opus_select_arch(); - ALLOC_STACK; -#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA) - if (global_stack==NULL) - goto failure; -#endif -#endif - -#ifndef CUSTOM_MODES_ONLY - for (i=0;iFs && - (frame_size<shortMdctSize*static_mode_list[i]->nbShortMdcts) - { - if (error) - *error = OPUS_OK; - return (CELTMode*)static_mode_list[i]; - } - } - } -#endif /* CUSTOM_MODES_ONLY */ - -#ifndef CUSTOM_MODES - if (error) - *error = OPUS_BAD_ARG; - return NULL; -#else - - /* The good thing here is that permutation of the arguments will automatically be invalid */ - - if (Fs < 8000 || Fs > 96000) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - if (frame_size < 40 || frame_size > 1024 || frame_size%2!=0) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - /* Frames of less than 1ms are not supported. */ - if ((opus_int32)frame_size*1000 < Fs) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - - if ((opus_int32)frame_size*75 >= Fs && (frame_size%16)==0) - { - LM = 3; - } else if ((opus_int32)frame_size*150 >= Fs && (frame_size%8)==0) - { - LM = 2; - } else if ((opus_int32)frame_size*300 >= Fs && (frame_size%4)==0) - { - LM = 1; - } else - { - LM = 0; - } - - /* Shorts longer than 3.3ms are not supported. */ - if ((opus_int32)(frame_size>>LM)*300 > Fs) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - - mode = opus_alloc(sizeof(CELTMode)); - if (mode==NULL) - goto failure; - mode->Fs = Fs; - - /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis - is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should - approximate that. */ - if(Fs < 12000) /* 8 kHz */ - { - mode->preemph[0] = QCONST16(0.3500061035f, 15); - mode->preemph[1] = -QCONST16(0.1799926758f, 15); - mode->preemph[2] = QCONST16(0.2719968125f, SIG_SHIFT); /* exact 1/preemph[3] */ - mode->preemph[3] = QCONST16(3.6765136719f, 13); - } else if(Fs < 24000) /* 16 kHz */ - { - mode->preemph[0] = QCONST16(0.6000061035f, 15); - mode->preemph[1] = -QCONST16(0.1799926758f, 15); - mode->preemph[2] = QCONST16(0.4424998650f, SIG_SHIFT); /* exact 1/preemph[3] */ - mode->preemph[3] = QCONST16(2.2598876953f, 13); - } else if(Fs < 40000) /* 32 kHz */ - { - mode->preemph[0] = QCONST16(0.7799987793f, 15); - mode->preemph[1] = -QCONST16(0.1000061035f, 15); - mode->preemph[2] = QCONST16(0.7499771125f, SIG_SHIFT); /* exact 1/preemph[3] */ - mode->preemph[3] = QCONST16(1.3333740234f, 13); - } else /* 48 kHz */ - { - mode->preemph[0] = QCONST16(0.8500061035f, 15); - mode->preemph[1] = QCONST16(0.0f, 15); - mode->preemph[2] = QCONST16(1.f, SIG_SHIFT); - mode->preemph[3] = QCONST16(1.f, 13); - } - - mode->maxLM = LM; - mode->nbShortMdcts = 1<shortMdctSize = frame_size/mode->nbShortMdcts; - res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize); - - mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands); - if (mode->eBands==NULL) - goto failure; -#if !defined(SMALL_FOOTPRINT) - /* Make sure we don't allocate a band larger than our PVQ table. - 208 should be enough, but let's be paranoid. */ - if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])< - 208) { - goto failure; - } -#endif - - mode->effEBands = mode->nbEBands; - while (mode->eBands[mode->effEBands] > mode->shortMdctSize) - mode->effEBands--; - - /* Overlap must be divisible by 4 */ - mode->overlap = ((mode->shortMdctSize>>2)<<2); - - compute_allocation_table(mode); - if (mode->allocVectors==NULL) - goto failure; - - window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16)); - if (window==NULL) - goto failure; - -#ifndef FIXED_POINT - for (i=0;ioverlap;i++) - window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap)); -#else - for (i=0;ioverlap;i++) - window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap)))); -#endif - mode->window = window; - - logN = (opus_int16*)opus_alloc(mode->nbEBands*sizeof(opus_int16)); - if (logN==NULL) - goto failure; - - for (i=0;inbEBands;i++) - logN[i] = log2_frac(mode->eBands[i+1]-mode->eBands[i], BITRES); - mode->logN = logN; - - compute_pulse_cache(mode, mode->maxLM); - - if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, - mode->maxLM, arch) == 0) - goto failure; - - if (error) - *error = OPUS_OK; - - return mode; -failure: - if (error) - *error = OPUS_ALLOC_FAIL; - if (mode!=NULL) - opus_custom_mode_destroy(mode); - return NULL; -#endif /* !CUSTOM_MODES */ -} - -#ifdef CUSTOM_MODES -void opus_custom_mode_destroy(CELTMode *mode) -{ - int arch = opus_select_arch(); - - if (mode == NULL) - return; -#ifndef CUSTOM_MODES_ONLY - { - int i; - for (i=0;ieBands); - opus_free((opus_int16*)mode->allocVectors); - - opus_free((opus_val16*)mode->window); - opus_free((opus_int16*)mode->logN); - - opus_free((opus_int16*)mode->cache.index); - opus_free((unsigned char*)mode->cache.bits); - opus_free((unsigned char*)mode->cache.caps); - clt_mdct_clear(&mode->mdct, arch); - - opus_free((CELTMode *)mode); -} -#endif diff --git a/thirdparty/opus/celt/modes.h b/thirdparty/opus/celt/modes.h deleted file mode 100644 index be813ccc8b..0000000000 --- a/thirdparty/opus/celt/modes.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef MODES_H -#define MODES_H - -#include "opus_types.h" -#include "celt.h" -#include "arch.h" -#include "mdct.h" -#include "entenc.h" -#include "entdec.h" - -#define MAX_PERIOD 1024 - -typedef struct { - int size; - const opus_int16 *index; - const unsigned char *bits; - const unsigned char *caps; -} PulseCache; - -/** Mode definition (opaque) - @brief Mode definition - */ -struct OpusCustomMode { - opus_int32 Fs; - int overlap; - - int nbEBands; - int effEBands; - opus_val16 preemph[4]; - const opus_int16 *eBands; /**< Definition for each "pseudo-critical band" */ - - int maxLM; - int nbShortMdcts; - int shortMdctSize; - - int nbAllocVectors; /**< Number of lines in the matrix below */ - const unsigned char *allocVectors; /**< Number of bits in each band for several rates */ - const opus_int16 *logN; - - const opus_val16 *window; - mdct_lookup mdct; - PulseCache cache; -}; - - -#endif diff --git a/thirdparty/opus/celt/opus_custom_demo.c b/thirdparty/opus/celt/opus_custom_demo.c deleted file mode 100644 index ae41c0de5a..0000000000 --- a/thirdparty/opus/celt/opus_custom_demo.c +++ /dev/null @@ -1,210 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_custom.h" -#include "arch.h" -#include -#include -#include -#include - -#define MAX_PACKET 1275 - -int main(int argc, char *argv[]) -{ - int err; - char *inFile, *outFile; - FILE *fin, *fout; - OpusCustomMode *mode=NULL; - OpusCustomEncoder *enc; - OpusCustomDecoder *dec; - int len; - opus_int32 frame_size, channels, rate; - int bytes_per_packet; - unsigned char data[MAX_PACKET]; - int complexity; -#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) - int i; - double rmsd = 0; -#endif - int count = 0; - opus_int32 skip; - opus_int16 *in, *out; - if (argc != 9 && argc != 8 && argc != 7) - { - fprintf (stderr, "Usage: test_opus_custom " - " [ [packet loss rate]] " - " \n"); - return 1; - } - - rate = (opus_int32)atol(argv[1]); - channels = atoi(argv[2]); - frame_size = atoi(argv[3]); - mode = opus_custom_mode_create(rate, frame_size, NULL); - if (mode == NULL) - { - fprintf(stderr, "failed to create a mode\n"); - return 1; - } - - bytes_per_packet = atoi(argv[4]); - if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET) - { - fprintf (stderr, "bytes per packet must be between 0 and %d\n", - MAX_PACKET); - return 1; - } - - inFile = argv[argc-2]; - fin = fopen(inFile, "rb"); - if (!fin) - { - fprintf (stderr, "Could not open input file %s\n", argv[argc-2]); - return 1; - } - outFile = argv[argc-1]; - fout = fopen(outFile, "wb+"); - if (!fout) - { - fprintf (stderr, "Could not open output file %s\n", argv[argc-1]); - fclose(fin); - return 1; - } - - enc = opus_custom_encoder_create(mode, channels, &err); - if (err != 0) - { - fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err)); - fclose(fin); - fclose(fout); - return 1; - } - dec = opus_custom_decoder_create(mode, channels, &err); - if (err != 0) - { - fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err)); - fclose(fin); - fclose(fout); - return 1; - } - opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip)); - - if (argc>7) - { - complexity=atoi(argv[5]); - opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity)); - } - - in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16)); - out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16)); - - while (!feof(fin)) - { - int ret; - err = fread(in, sizeof(short), frame_size*channels, fin); - if (feof(fin)) - break; - len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet); - if (len <= 0) - fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len)); - - /* This is for simulating bit errors */ -#if 0 - int errors = 0; - int eid = 0; - /* This simulates random bit error */ - for (i=0;i 0) - { - rmsd = sqrt(rmsd/(1.0*frame_size*channels*count)); - fprintf (stderr, "Error: encoder doesn't match decoder\n"); - fprintf (stderr, "RMS mismatch is %f\n", rmsd); - return 1; - } else { - fprintf (stderr, "Encoder matches decoder!!\n"); - } -#endif - return 0; -} - diff --git a/thirdparty/opus/celt/os_support.h b/thirdparty/opus/celt/os_support.h deleted file mode 100644 index a2171971e9..0000000000 --- a/thirdparty/opus/celt/os_support.h +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (C) 2007 Jean-Marc Valin - - File: os_support.h - This is the (tiny) OS abstraction layer. Aside from math.h, this is the - only place where system headers are allowed. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef OS_SUPPORT_H -#define OS_SUPPORT_H - -#ifdef CUSTOM_SUPPORT -# include "custom_support.h" -#endif - -#include "opus_types.h" -#include "opus_defines.h" - -#include -#include -#include - -/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */ -#ifndef OVERRIDE_OPUS_ALLOC -static OPUS_INLINE void *opus_alloc (size_t size) -{ - return malloc(size); -} -#endif - -/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */ -#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH -static OPUS_INLINE void *opus_alloc_scratch (size_t size) -{ - /* Scratch space doesn't need to be cleared */ - return opus_alloc(size); -} -#endif - -/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */ -#ifndef OVERRIDE_OPUS_FREE -static OPUS_INLINE void opus_free (void *ptr) -{ - free(ptr); -} -#endif - -/** Copy n elements from src to dst. The 0* term provides compile-time type checking */ -#ifndef OVERRIDE_OPUS_COPY -#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) -#endif - -/** Copy n elements from src to dst, allowing overlapping regions. The 0* term - provides compile-time type checking */ -#ifndef OVERRIDE_OPUS_MOVE -#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) -#endif - -/** Set n elements of dst to zero */ -#ifndef OVERRIDE_OPUS_CLEAR -#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) -#endif - -/*#ifdef __GNUC__ -#pragma GCC poison printf sprintf -#pragma GCC poison malloc free realloc calloc -#endif*/ - -#endif /* OS_SUPPORT_H */ - diff --git a/thirdparty/opus/celt/pitch.c b/thirdparty/opus/celt/pitch.c deleted file mode 100644 index bf46e7d562..0000000000 --- a/thirdparty/opus/celt/pitch.c +++ /dev/null @@ -1,557 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file pitch.c - @brief Pitch analysis - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pitch.h" -#include "os_support.h" -#include "modes.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "celt_lpc.h" - -static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, - int max_pitch, int *best_pitch -#ifdef FIXED_POINT - , int yshift, opus_val32 maxcorr -#endif - ) -{ - int i, j; - opus_val32 Syy=1; - opus_val16 best_num[2]; - opus_val32 best_den[2]; -#ifdef FIXED_POINT - int xshift; - - xshift = celt_ilog2(maxcorr)-14; -#endif - - best_num[0] = -1; - best_num[1] = -1; - best_den[0] = 0; - best_den[1] = 0; - best_pitch[0] = 0; - best_pitch[1] = 1; - for (j=0;j0) - { - opus_val16 num; - opus_val32 xcorr16; - xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); -#ifndef FIXED_POINT - /* Considering the range of xcorr16, this should avoid both underflows - and overflows (inf) when squaring xcorr16 */ - xcorr16 *= 1e-12f; -#endif - num = MULT16_16_Q15(xcorr16,xcorr16); - if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) - { - if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) - { - best_num[1] = best_num[0]; - best_den[1] = best_den[0]; - best_pitch[1] = best_pitch[0]; - best_num[0] = num; - best_den[0] = Syy; - best_pitch[0] = i; - } else { - best_num[1] = num; - best_den[1] = Syy; - best_pitch[1] = i; - } - } - } - Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); - Syy = MAX32(1, Syy); - } -} - -static void celt_fir5(const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - opus_val16 *mem) -{ - int i; - opus_val16 num0, num1, num2, num3, num4; - opus_val32 mem0, mem1, mem2, mem3, mem4; - num0=num[0]; - num1=num[1]; - num2=num[2]; - num3=num[3]; - num4=num[4]; - mem0=mem[0]; - mem1=mem[1]; - mem2=mem[2]; - mem3=mem[3]; - mem4=mem[4]; - for (i=0;i>1;i++) - x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift); - x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift); - if (C==2) - { - for (i=1;i>1;i++) - x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift); - x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); - } - - _celt_autocorr(x_lp, ac, NULL, 0, - 4, len>>1, arch); - - /* Noise floor -40 dB */ -#ifdef FIXED_POINT - ac[0] += SHR32(ac[0],13); -#else - ac[0] *= 1.0001f; -#endif - /* Lag windowing */ - for (i=1;i<=4;i++) - { - /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ -#ifdef FIXED_POINT - ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); -#else - ac[i] -= ac[i]*(.008f*i)*(.008f*i); -#endif - } - - _celt_lpc(lpc, ac, 4); - for (i=0;i<4;i++) - { - tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp); - lpc[i] = MULT16_16_Q15(lpc[i], tmp); - } - /* Add a zero */ - lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT); - lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]); - lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); - lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); - lpc2[4] = MULT16_16_Q15(c1,lpc[3]); - celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); -} - -/* Pure C implementation. */ -#ifdef FIXED_POINT -opus_val32 -#else -void -#endif -#if defined(OVERRIDE_PITCH_XCORR) -celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch) -#else -celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch) -#endif -{ - -#if 0 /* This is a simple version of the pitch correlation that should work - well on DSPs like Blackfin and TI C5x/C6x */ - int i, j; -#ifdef FIXED_POINT - opus_val32 maxcorr=1; -#endif -#if !defined(OVERRIDE_PITCH_XCORR) - (void)arch; -#endif - for (i=0;i0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); - for (i=0;i0); - celt_assert(max_pitch>0); - lag = len+max_pitch; - - ALLOC(x_lp4, len>>2, opus_val16); - ALLOC(y_lp4, lag>>2, opus_val16); - ALLOC(xcorr, max_pitch>>1, opus_val32); - - /* Downsample by 2 again */ - for (j=0;j>2;j++) - x_lp4[j] = x_lp[2*j]; - for (j=0;j>2;j++) - y_lp4[j] = y[2*j]; - -#ifdef FIXED_POINT - xmax = celt_maxabs16(x_lp4, len>>2); - ymax = celt_maxabs16(y_lp4, lag>>2); - shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11; - if (shift>0) - { - for (j=0;j>2;j++) - x_lp4[j] = SHR16(x_lp4[j], shift); - for (j=0;j>2;j++) - y_lp4[j] = SHR16(y_lp4[j], shift); - /* Use double the shift for a MAC */ - shift *= 2; - } else { - shift = 0; - } -#endif - - /* Coarse search with 4x decimation */ - -#ifdef FIXED_POINT - maxcorr = -#endif - celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch); - - find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch -#ifdef FIXED_POINT - , 0, maxcorr -#endif - ); - - /* Finer search with 2x decimation */ -#ifdef FIXED_POINT - maxcorr=1; -#endif - for (i=0;i>1;i++) - { - opus_val32 sum; - xcorr[i] = 0; - if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) - continue; -#ifdef FIXED_POINT - sum = 0; - for (j=0;j>1;j++) - sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); -#else - sum = celt_inner_prod_c(x_lp, y+i, len>>1); -#endif - xcorr[i] = MAX32(-1, sum); -#ifdef FIXED_POINT - maxcorr = MAX32(maxcorr, sum); -#endif - } - find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch -#ifdef FIXED_POINT - , shift+1, maxcorr -#endif - ); - - /* Refine by pseudo-interpolation */ - if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1) - { - opus_val32 a, b, c; - a = xcorr[best_pitch[0]-1]; - b = xcorr[best_pitch[0]]; - c = xcorr[best_pitch[0]+1]; - if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a)) - offset = 1; - else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c)) - offset = -1; - else - offset = 0; - } else { - offset = 0; - } - *pitch = 2*best_pitch[0]-offset; - - RESTORE_STACK; -} - -#ifdef FIXED_POINT -static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) -{ - opus_val32 x2y2; - int sx, sy, shift; - opus_val32 g; - opus_val16 den; - if (xy == 0 || xx == 0 || yy == 0) - return 0; - sx = celt_ilog2(xx)-14; - sy = celt_ilog2(yy)-14; - shift = sx + sy; - x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy)); - if (shift & 1) { - if (x2y2 < 32768) - { - x2y2 <<= 1; - shift--; - } else { - x2y2 >>= 1; - shift++; - } - } - den = celt_rsqrt_norm(x2y2); - g = MULT16_32_Q15(den, xy); - g = VSHR32(g, (shift>>1)-1); - return EXTRACT16(MIN32(g, Q15ONE)); -} -#else -static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) -{ - return xy/celt_sqrt(1+xx*yy); -} -#endif - -static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; -opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, - int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch) -{ - int k, i, T, T0; - opus_val16 g, g0; - opus_val16 pg; - opus_val32 xy,xx,yy,xy2; - opus_val32 xcorr[3]; - opus_val32 best_xy, best_yy; - int offset; - int minperiod0; - VARDECL(opus_val32, yy_lookup); - SAVE_STACK; - - minperiod0 = minperiod; - maxperiod /= 2; - minperiod /= 2; - *T0_ /= 2; - prev_period /= 2; - N /= 2; - x += maxperiod; - if (*T0_>=maxperiod) - *T0_=maxperiod-1; - - T = T0 = *T0_; - ALLOC(yy_lookup, maxperiod+1, opus_val32); - dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch); - yy_lookup[0] = xx; - yy=xx; - for (i=1;i<=maxperiod;i++) - { - yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]); - yy_lookup[i] = MAX32(0, yy); - } - yy = yy_lookup[T0]; - best_xy = xy; - best_yy = yy; - g = g0 = compute_pitch_gain(xy, xx, yy); - /* Look for any pitch at T/k */ - for (k=2;k<=15;k++) - { - int T1, T1b; - opus_val16 g1; - opus_val16 cont=0; - opus_val16 thresh; - T1 = celt_udiv(2*T0+k, 2*k); - if (T1 < minperiod) - break; - /* Look for another strong correlation at T1b */ - if (k==2) - { - if (T1+T0>maxperiod) - T1b = T0; - else - T1b = T0+T1; - } else - { - T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); - } - dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch); - xy = HALF32(xy + xy2); - yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]); - g1 = compute_pitch_gain(xy, xx, yy); - if (abs(T1-prev_period)<=1) - cont = prev_gain; - else if (abs(T1-prev_period)<=2 && 5*k*k < T0) - cont = HALF16(prev_gain); - else - cont = 0; - thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); - /* Bias against very high pitch (very short period) to avoid false-positives - due to short-term correlation */ - if (T1<3*minperiod) - thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont); - else if (T1<2*minperiod) - thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont); - if (g1 > thresh) - { - best_xy = xy; - best_yy = yy; - T = T1; - g = g1; - } - } - best_xy = MAX32(0, best_xy); - if (best_yy <= best_xy) - pg = Q15ONE; - else - pg = SHR32(frac_div32(best_xy,best_yy+1),16); - - for (k=0;k<3;k++) - xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch); - if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) - offset = 1; - else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) - offset = -1; - else - offset = 0; - if (pg > g) - pg = g; - *T0_ = 2*T+offset; - - if (*T0_=3); - y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ - y_0=*y++; - y_1=*y++; - y_2=*y++; - for (j=0;j -#include "os_support.h" -#include "arch.h" -#include "mathops.h" -#include "stack_alloc.h" -#include "rate.h" - -#ifdef FIXED_POINT -/* Mean energy in each band quantized in Q4 */ -const signed char eMeans[25] = { - 103,100, 92, 85, 81, - 77, 72, 70, 78, 75, - 73, 71, 78, 74, 69, - 72, 70, 74, 76, 71, - 60, 60, 60, 60, 60 -}; -#else -/* Mean energy in each band quantized in Q4 and converted back to float */ -const opus_val16 eMeans[25] = { - 6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f, - 4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f, - 4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f, - 4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f, - 3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f -}; -#endif -/* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */ -#ifdef FIXED_POINT -static const opus_val16 pred_coef[4] = {29440, 26112, 21248, 16384}; -static const opus_val16 beta_coef[4] = {30147, 22282, 12124, 6554}; -static const opus_val16 beta_intra = 4915; -#else -static const opus_val16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.}; -static const opus_val16 beta_coef[4] = {30147/32768., 22282/32768., 12124/32768., 6554/32768.}; -static const opus_val16 beta_intra = 4915/32768.; -#endif - -/*Parameters of the Laplace-like probability models used for the coarse energy. - There is one pair of parameters for each frame size, prediction type - (inter/intra), and band number. - The first number of each pair is the probability of 0, and the second is the - decay rate, both in Q8 precision.*/ -static const unsigned char e_prob_model[4][2][42] = { - /*120 sample frames.*/ - { - /*Inter*/ - { - 72, 127, 65, 129, 66, 128, 65, 128, 64, 128, 62, 128, 64, 128, - 64, 128, 92, 78, 92, 79, 92, 78, 90, 79, 116, 41, 115, 40, - 114, 40, 132, 26, 132, 26, 145, 17, 161, 12, 176, 10, 177, 11 - }, - /*Intra*/ - { - 24, 179, 48, 138, 54, 135, 54, 132, 53, 134, 56, 133, 55, 132, - 55, 132, 61, 114, 70, 96, 74, 88, 75, 88, 87, 74, 89, 66, - 91, 67, 100, 59, 108, 50, 120, 40, 122, 37, 97, 43, 78, 50 - } - }, - /*240 sample frames.*/ - { - /*Inter*/ - { - 83, 78, 84, 81, 88, 75, 86, 74, 87, 71, 90, 73, 93, 74, - 93, 74, 109, 40, 114, 36, 117, 34, 117, 34, 143, 17, 145, 18, - 146, 19, 162, 12, 165, 10, 178, 7, 189, 6, 190, 8, 177, 9 - }, - /*Intra*/ - { - 23, 178, 54, 115, 63, 102, 66, 98, 69, 99, 74, 89, 71, 91, - 73, 91, 78, 89, 86, 80, 92, 66, 93, 64, 102, 59, 103, 60, - 104, 60, 117, 52, 123, 44, 138, 35, 133, 31, 97, 38, 77, 45 - } - }, - /*480 sample frames.*/ - { - /*Inter*/ - { - 61, 90, 93, 60, 105, 42, 107, 41, 110, 45, 116, 38, 113, 38, - 112, 38, 124, 26, 132, 27, 136, 19, 140, 20, 155, 14, 159, 16, - 158, 18, 170, 13, 177, 10, 187, 8, 192, 6, 175, 9, 159, 10 - }, - /*Intra*/ - { - 21, 178, 59, 110, 71, 86, 75, 85, 84, 83, 91, 66, 88, 73, - 87, 72, 92, 75, 98, 72, 105, 58, 107, 54, 115, 52, 114, 55, - 112, 56, 129, 51, 132, 40, 150, 33, 140, 29, 98, 35, 77, 42 - } - }, - /*960 sample frames.*/ - { - /*Inter*/ - { - 42, 121, 96, 66, 108, 43, 111, 40, 117, 44, 123, 32, 120, 36, - 119, 33, 127, 33, 134, 34, 139, 21, 147, 23, 152, 20, 158, 25, - 154, 26, 166, 21, 173, 16, 184, 13, 184, 10, 150, 13, 139, 15 - }, - /*Intra*/ - { - 22, 178, 63, 114, 74, 82, 84, 83, 92, 82, 103, 62, 96, 72, - 96, 67, 101, 73, 107, 72, 113, 55, 118, 52, 125, 52, 118, 52, - 117, 55, 135, 49, 137, 39, 157, 32, 145, 29, 97, 33, 77, 40 - } - } -}; - -static const unsigned char small_energy_icdf[3]={2,1,0}; - -static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C) -{ - int c, i; - opus_val32 dist = 0; - c=0; do { - for (i=start;inbEBands]; - oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); -#ifdef FIXED_POINT - f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c]; - /* Rounding to nearest integer here is really important! */ - qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7); - decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT), - SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay))); -#else - f = x-coef*oldE-prev[c]; - /* Rounding to nearest integer here is really important! */ - qi = (int)floor(.5f+f); - decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay; -#endif - /* Prevent the energy from going down too quickly (e.g. for bands - that have just one bin) */ - if (qi < 0 && x < decay_bound) - { - qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT); - if (qi > 0) - qi = 0; - } - qi0 = qi; - /* If we don't have enough bits to encode all the energy, just assume - something safe. */ - tell = ec_tell(enc); - bits_left = budget-tell-3*C*(end-i); - if (i!=start && bits_left < 30) - { - if (bits_left < 24) - qi = IMIN(1, qi); - if (bits_left < 16) - qi = IMAX(-1, qi); - } - if (lfe && i>=2) - qi = IMIN(qi, 0); - if (budget-tell >= 15) - { - int pi; - pi = 2*IMIN(i,20); - ec_laplace_encode(enc, &qi, - prob_model[pi]<<7, prob_model[pi+1]<<6); - } - else if(budget-tell >= 2) - { - qi = IMAX(-1, IMIN(qi, 1)); - ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2); - } - else if(budget-tell >= 1) - { - qi = IMIN(0, qi); - ec_enc_bit_logp(enc, -qi, 1); - } - else - qi = -1; - error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT); - badness += abs(qi0-qi); - q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); - - tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7); -#ifdef FIXED_POINT - tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); -#endif - oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); - prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); - } while (++c < C); - } - return lfe ? 0 : badness; -} - -void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, - const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, - opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes, - int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe) -{ - int intra; - opus_val16 max_decay; - VARDECL(opus_val16, oldEBands_intra); - VARDECL(opus_val16, error_intra); - ec_enc enc_start_state; - opus_uint32 tell; - int badness1=0; - opus_int32 intra_bias; - opus_val32 new_distortion; - SAVE_STACK; - - intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C); - intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512)); - new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C); - - tell = ec_tell(enc); - if (tell+3 > budget) - two_pass = intra = 0; - - max_decay = QCONST16(16.f,DB_SHIFT); - if (end-start>10) - { -#ifdef FIXED_POINT - max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3)); -#else - max_decay = MIN32(max_decay, .125f*nbAvailableBytes); -#endif - } - if (lfe) - max_decay = QCONST16(3.f,DB_SHIFT); - enc_start_state = *enc; - - ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16); - ALLOC(error_intra, C*m->nbEBands, opus_val16); - OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands); - - if (two_pass || intra) - { - badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget, - tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe); - } - - if (!intra) - { - unsigned char *intra_buf; - ec_enc enc_intra_state; - opus_int32 tell_intra; - opus_uint32 nstart_bytes; - opus_uint32 nintra_bytes; - opus_uint32 save_bytes; - int badness2; - VARDECL(unsigned char, intra_bits); - - tell_intra = ec_tell_frac(enc); - - enc_intra_state = *enc; - - nstart_bytes = ec_range_bytes(&enc_start_state); - nintra_bytes = ec_range_bytes(&enc_intra_state); - intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes; - save_bytes = nintra_bytes-nstart_bytes; - if (save_bytes == 0) - save_bytes = ALLOC_NONE; - ALLOC(intra_bits, save_bytes, unsigned char); - /* Copy bits from intra bit-stream */ - OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes); - - *enc = enc_start_state; - - badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget, - tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe); - - if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra))) - { - *enc = enc_intra_state; - /* Copy intra bits to bit-stream */ - OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes); - OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands); - OPUS_COPY(error, error_intra, C*m->nbEBands); - intra = 1; - } - } else { - OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands); - OPUS_COPY(error, error_intra, C*m->nbEBands); - } - - if (intra) - *delayedIntra = new_distortion; - else - *delayedIntra = ADD32(MULT16_32_Q15(MULT16_16_Q15(pred_coef[LM], pred_coef[LM]),*delayedIntra), - new_distortion); - - RESTORE_STACK; -} - -void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C) -{ - int i, c; - - /* Encode finer resolution */ - for (i=start;inbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]); -#else - q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac); -#endif - if (q2 > frac-1) - q2 = frac-1; - if (q2<0) - q2 = 0; - ec_enc_bits(enc, q2, fine_quant[i]); -#ifdef FIXED_POINT - offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT)); -#else - offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f; -#endif - oldEBands[i+c*m->nbEBands] += offset; - error[i+c*m->nbEBands] -= offset; - /*printf ("%f ", error[i] - offset);*/ - } while (++c < C); - } -} - -void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C) -{ - int i, prio, c; - - /* Use up the remaining bits */ - for (prio=0;prio<2;prio++) - { - for (i=start;i=C ;i++) - { - if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio) - continue; - c=0; - do { - int q2; - opus_val16 offset; - q2 = error[i+c*m->nbEBands]<0 ? 0 : 1; - ec_enc_bits(enc, q2, 1); -#ifdef FIXED_POINT - offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1); -#else - offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); -#endif - oldEBands[i+c*m->nbEBands] += offset; - bits_left--; - } while (++c < C); - } - } -} - -void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM) -{ - const unsigned char *prob_model = e_prob_model[LM][intra]; - int i, c; - opus_val32 prev[2] = {0, 0}; - opus_val16 coef; - opus_val16 beta; - opus_int32 budget; - opus_int32 tell; - - if (intra) - { - coef = 0; - beta = beta_intra; - } else { - beta = beta_coef[LM]; - coef = pred_coef[LM]; - } - - budget = dec->storage*8; - - /* Decode at a fixed coarse resolution */ - for (i=start;i=15) - { - int pi; - pi = 2*IMIN(i,20); - qi = ec_laplace_decode(dec, - prob_model[pi]<<7, prob_model[pi+1]<<6); - } - else if(budget-tell>=2) - { - qi = ec_dec_icdf(dec, small_energy_icdf, 2); - qi = (qi>>1)^-(qi&1); - } - else if(budget-tell>=1) - { - qi = -ec_dec_bit_logp(dec, 1); - } - else - qi = -1; - q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); - - oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); - tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7); -#ifdef FIXED_POINT - tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); -#endif - oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); - prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); - } while (++c < C); - } -} - -void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C) -{ - int i, c; - /* Decode finer resolution */ - for (i=start;inbEBands] += offset; - } while (++c < C); - } -} - -void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C) -{ - int i, prio, c; - - /* Use up the remaining bits */ - for (prio=0;prio<2;prio++) - { - for (i=start;i=C ;i++) - { - if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio) - continue; - c=0; - do { - int q2; - opus_val16 offset; - q2 = ec_dec_bits(dec, 1); -#ifdef FIXED_POINT - offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1); -#else - offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); -#endif - oldEBands[i+c*m->nbEBands] += offset; - bits_left--; - } while (++c < C); - } - } -} - -void amp2Log2(const CELTMode *m, int effEnd, int end, - celt_ener *bandE, opus_val16 *bandLogE, int C) -{ - int c, i; - c=0; - do { - for (i=0;inbEBands] = - celt_log2(SHL32(bandE[i+c*m->nbEBands],2)) - - SHL16((opus_val16)eMeans[i],6); - for (i=effEnd;inbEBands+i] = -QCONST16(14.f,DB_SHIFT); - } while (++c < C); -} diff --git a/thirdparty/opus/celt/quant_bands.h b/thirdparty/opus/celt/quant_bands.h deleted file mode 100644 index 0490bca4b4..0000000000 --- a/thirdparty/opus/celt/quant_bands.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef QUANT_BANDS -#define QUANT_BANDS - -#include "arch.h" -#include "modes.h" -#include "entenc.h" -#include "entdec.h" -#include "mathops.h" - -#ifdef FIXED_POINT -extern const signed char eMeans[25]; -#else -extern const opus_val16 eMeans[25]; -#endif - -void amp2Log2(const CELTMode *m, int effEnd, int end, - celt_ener *bandE, opus_val16 *bandLogE, int C); - -void log2Amp(const CELTMode *m, int start, int end, - celt_ener *eBands, const opus_val16 *oldEBands, int C); - -void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, - const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, - opus_val16 *error, ec_enc *enc, int C, int LM, - int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra, - int two_pass, int loss_rate, int lfe); - -void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C); - -void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C); - -void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM); - -void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C); - -void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C); - -#endif /* QUANT_BANDS */ diff --git a/thirdparty/opus/celt/rate.c b/thirdparty/opus/celt/rate.c deleted file mode 100644 index 7dfa5be8a6..0000000000 --- a/thirdparty/opus/celt/rate.c +++ /dev/null @@ -1,639 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "modes.h" -#include "cwrs.h" -#include "arch.h" -#include "os_support.h" - -#include "entcode.h" -#include "rate.h" - -static const unsigned char LOG2_FRAC_TABLE[24]={ - 0, - 8,13, - 16,19,21,23, - 24,26,27,28,29,30,31,32, - 32,33,34,34,35,36,36,37,37 -}; - -#ifdef CUSTOM_MODES - -/*Determines if V(N,K) fits in a 32-bit unsigned integer. - N and K are themselves limited to 15 bits.*/ -static int fits_in32(int _n, int _k) -{ - static const opus_int16 maxN[15] = { - 32767, 32767, 32767, 1476, 283, 109, 60, 40, - 29, 24, 20, 18, 16, 14, 13}; - static const opus_int16 maxK[15] = { - 32767, 32767, 32767, 32767, 1172, 238, 95, 53, - 36, 27, 22, 18, 16, 15, 13}; - if (_n>=14) - { - if (_k>=14) - return 0; - else - return _n <= maxN[_k]; - } else { - return _k <= maxK[_n]; - } -} - -void compute_pulse_cache(CELTMode *m, int LM) -{ - int C; - int i; - int j; - int curr=0; - int nbEntries=0; - int entryN[100], entryK[100], entryI[100]; - const opus_int16 *eBands = m->eBands; - PulseCache *cache = &m->cache; - opus_int16 *cindex; - unsigned char *bits; - unsigned char *cap; - - cindex = (opus_int16 *)opus_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2)); - cache->index = cindex; - - /* Scan for all unique band sizes */ - for (i=0;i<=LM+1;i++) - { - for (j=0;jnbEBands;j++) - { - int k; - int N = (eBands[j+1]-eBands[j])<>1; - cindex[i*m->nbEBands+j] = -1; - /* Find other bands that have the same size */ - for (k=0;k<=i;k++) - { - int n; - for (n=0;nnbEBands && (k!=i || n>1) - { - cindex[i*m->nbEBands+j] = cindex[k*m->nbEBands+n]; - break; - } - } - } - if (cache->index[i*m->nbEBands+j] == -1 && N!=0) - { - int K; - entryN[nbEntries] = N; - K = 0; - while (fits_in32(N,get_pulses(K+1)) && KnbEBands+j] = curr; - entryI[nbEntries] = curr; - - curr += K+1; - nbEntries++; - } - } - } - bits = (unsigned char *)opus_alloc(sizeof(unsigned char)*curr); - cache->bits = bits; - cache->size = curr; - /* Compute the cache for all unique sizes */ - for (i=0;icaps = cap = (unsigned char *)opus_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands); - for (i=0;i<=LM;i++) - { - for (C=1;C<=2;C++) - { - for (j=0;jnbEBands;j++) - { - int N0; - int max_bits; - N0 = m->eBands[j+1]-m->eBands[j]; - /* N=1 bands only have a sign bit and fine bits. */ - if (N0<1 are even, including custom modes.*/ - if (N0 > 2) - { - N0>>=1; - LM0--; - } - /* N0=1 bands can't be split down to N<2. */ - else if (N0 <= 1) - { - LM0=IMIN(i,1); - N0<<=LM0; - } - /* Compute the cost for the lowest-level PVQ of a fully split - band. */ - pcache = bits + cindex[(LM0+1)*m->nbEBands+j]; - max_bits = pcache[pcache[0]]+1; - /* Add in the cost of coding regular splits. */ - N = N0; - for(k=0;klogN[j]+((LM0+k)<>1)-QTHETA_OFFSET; - /* The number of qtheta bits we'll allocate if the remainder - is to be max_bits. - The average measured cost for theta is 0.89701 times qb, - approximated here as 459/512. */ - num=459*(opus_int32)((2*N-1)*offset+max_bits); - den=((opus_int32)(2*N-1)<<9)-459; - qb = IMIN((num+(den>>1))/den, 57); - celt_assert(qb >= 0); - max_bits += qb; - N <<= 1; - } - /* Add in the cost of a stereo split, if necessary. */ - if (C==2) - { - max_bits <<= 1; - offset = ((m->logN[j]+(i<>1)-(N==2?QTHETA_OFFSET_TWOPHASE:QTHETA_OFFSET); - ndof = 2*N-1-(N==2); - /* The average measured cost for theta with the step PDF is - 0.95164 times qb, approximated here as 487/512. */ - num = (N==2?512:487)*(opus_int32)(max_bits+ndof*offset); - den = ((opus_int32)ndof<<9)-(N==2?512:487); - qb = IMIN((num+(den>>1))/den, (N==2?64:61)); - celt_assert(qb >= 0); - max_bits += qb; - } - /* Add the fine bits we'll use. */ - /* Compensate for the extra DoF in stereo */ - ndof = C*N + ((C==2 && N>2) ? 1 : 0); - /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET - compared to their "fair share" of total/N */ - offset = ((m->logN[j] + (i<>1)-FINE_OFFSET; - /* N=2 is the only point that doesn't match the curve */ - if (N==2) - offset += 1<>2; - /* The number of fine bits we'll allocate if the remainder is - to be max_bits. */ - num = max_bits+ndof*offset; - den = (ndof-1)<>1))/den, MAX_FINE_BITS); - celt_assert(qb >= 0); - max_bits += C*qb<eBands[j+1]-m->eBands[j])<= 0); - celt_assert(max_bits < 256); - *cap++ = (unsigned char)max_bits; - } - } - } -} - -#endif /* CUSTOM_MODES */ - -#define ALLOC_STEPS 6 - -static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start, - const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance, - int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits, - int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) -{ - opus_int32 psum; - int lo, hi; - int i, j; - int logM; - int stereo; - int codedBands=-1; - int alloc_floor; - opus_int32 left, percoeff; - int done; - opus_int32 balance; - SAVE_STACK; - - alloc_floor = C<1; - - logM = LM<>1; - psum = 0; - done = 0; - for (j=end;j-->start;) - { - int tmp = bits1[j] + (mid*(opus_int32)bits2[j]>>ALLOC_STEPS); - if (tmp >= thresh[j] || done) - { - done = 1; - /* Don't allocate more than we can actually use */ - psum += IMIN(tmp, cap[j]); - } else { - if (tmp >= alloc_floor) - psum += alloc_floor; - } - } - if (psum > total) - hi = mid; - else - lo = mid; - } - psum = 0; - /*printf ("interp bisection gave %d\n", lo);*/ - done = 0; - for (j=end;j-->start;) - { - int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS); - if (tmp < thresh[j] && !done) - { - if (tmp >= alloc_floor) - tmp = alloc_floor; - else - tmp = 0; - } else - done = 1; - /* Don't allocate more than we can actually use */ - tmp = IMIN(tmp, cap[j]); - bits[j] = tmp; - psum += tmp; - } - - /* Decide which bands to skip, working backwards from the end. */ - for (codedBands=end;;codedBands--) - { - int band_width; - int band_bits; - int rem; - j = codedBands-1; - /* Never skip the first band, nor a band that has been boosted by - dynalloc. - In the first case, we'd be coding a bit to signal we're going to waste - all the other bits. - In the second case, we'd be coding a bit to redistribute all the bits - we just signaled should be cocentrated in this band. */ - if (j<=skip_start) - { - /* Give the bit we reserved to end skipping back. */ - total += skip_rsv; - break; - } - /*Figure out how many left-over bits we would be adding to this band. - This can include bits we've stolen back from higher, skipped bands.*/ - left = total-psum; - percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); - left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; - rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); - band_width = m->eBands[codedBands]-m->eBands[j]; - band_bits = (int)(bits[j] + percoeff*band_width + rem); - /*Only code a skip decision if we're above the threshold for this band. - Otherwise it is force-skipped. - This ensures that we have enough bits to code the skip flag.*/ - if (band_bits >= IMAX(thresh[j], alloc_floor+(1< ((j>4 && j<=signalBandwidth)) -#endif - { - ec_enc_bit_logp(ec, 1, 1); - break; - } - ec_enc_bit_logp(ec, 0, 1); - } else if (ec_dec_bit_logp(ec, 1)) { - break; - } - /*We used a bit to skip this band.*/ - psum += 1< 0) - intensity_rsv = LOG2_FRAC_TABLE[j-start]; - psum += intensity_rsv; - if (band_bits >= alloc_floor) - { - /*If we have enough for a fine energy bit per channel, use it.*/ - psum += alloc_floor; - bits[j] = alloc_floor; - } else { - /*Otherwise this band gets nothing at all.*/ - bits[j] = 0; - } - } - - celt_assert(codedBands > start); - /* Code the intensity and dual stereo parameters. */ - if (intensity_rsv > 0) - { - if (encode) - { - *intensity = IMIN(*intensity, codedBands); - ec_enc_uint(ec, *intensity-start, codedBands+1-start); - } - else - *intensity = start+ec_dec_uint(ec, codedBands+1-start); - } - else - *intensity = 0; - if (*intensity <= start) - { - total += dual_stereo_rsv; - dual_stereo_rsv = 0; - } - if (dual_stereo_rsv > 0) - { - if (encode) - ec_enc_bit_logp(ec, *dual_stereo, 1); - else - *dual_stereo = ec_dec_bit_logp(ec, 1); - } - else - *dual_stereo = 0; - - /* Allocate the remaining bits */ - left = total-psum; - percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); - left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; - for (j=start;jeBands[j+1]-m->eBands[j])); - for (j=start;jeBands[j+1]-m->eBands[j]); - bits[j] += tmp; - left -= tmp; - } - /*for (j=0;j= 0); - N0 = m->eBands[j+1]-m->eBands[j]; - N=N0<1) - { - excess = MAX32(bit-cap[j],0); - bits[j] = bit-excess; - - /* Compensate for the extra DoF in stereo */ - den=(C*N+ ((C==2 && N>2 && !*dual_stereo && j<*intensity) ? 1 : 0)); - - NClogN = den*(m->logN[j] + logM); - - /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET - compared to their "fair share" of total/N */ - offset = (NClogN>>1)-den*FINE_OFFSET; - - /* N=2 is the only point that doesn't match the curve */ - if (N==2) - offset += den<>2; - - /* Changing the offset for allocating the second and third - fine energy bit */ - if (bits[j] + offset < den*2<>2; - else if (bits[j] + offset < den*3<>3; - - /* Divide with rounding */ - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1)))); - ebits[j] = celt_udiv(ebits[j], den)>>BITRES; - - /* Make sure not to bust */ - if (C*ebits[j] > (bits[j]>>BITRES)) - ebits[j] = bits[j] >> stereo >> BITRES; - - /* More than that is useless because that's about as far as PVQ can go */ - ebits[j] = IMIN(ebits[j], MAX_FINE_BITS); - - /* If we rounded down or capped this band, make it a candidate for the - final fine energy pass */ - fine_priority[j] = ebits[j]*(den<= bits[j]+offset; - - /* Remove the allocated fine bits; the rest are assigned to PVQ */ - bits[j] -= C*ebits[j]< 0) - { - int extra_fine; - int extra_bits; - extra_fine = IMIN(excess>>(stereo+BITRES),MAX_FINE_BITS-ebits[j]); - ebits[j] += extra_fine; - extra_bits = extra_fine*C<= excess-balance; - excess -= extra_bits; - } - balance = excess; - - celt_assert(bits[j] >= 0); - celt_assert(ebits[j] >= 0); - } - /* Save any remaining bits over the cap for the rebalancing in - quant_all_bands(). */ - *_balance = balance; - - /* The skipped bands use all their bits for fine energy. */ - for (;j> stereo >> BITRES; - celt_assert(C*ebits[j]<nbEBands; - skip_start = start; - /* Reserve a bit to signal the end of manually skipped bands. */ - skip_rsv = total >= 1<total) - intensity_rsv = 0; - else - { - total -= intensity_rsv; - dual_stereo_rsv = total>=1<eBands[j+1]-m->eBands[j])<>4); - /* Tilt of the allocation curve */ - trim_offset[j] = C*(m->eBands[j+1]-m->eBands[j])*(alloc_trim-5-LM)*(end-j-1) - *(1<<(LM+BITRES))>>6; - /* Giving less resolution to single-coefficient bands because they get - more benefit from having one coarse value per coefficient*/ - if ((m->eBands[j+1]-m->eBands[j])<nbAllocVectors - 1; - do - { - int done = 0; - int psum = 0; - int mid = (lo+hi) >> 1; - for (j=end;j-->start;) - { - int bitsj; - int N = m->eBands[j+1]-m->eBands[j]; - bitsj = C*N*m->allocVectors[mid*len+j]<>2; - if (bitsj > 0) - bitsj = IMAX(0, bitsj + trim_offset[j]); - bitsj += offsets[j]; - if (bitsj >= thresh[j] || done) - { - done = 1; - /* Don't allocate more than we can actually use */ - psum += IMIN(bitsj, cap[j]); - } else { - if (bitsj >= C< total) - hi = mid - 1; - else - lo = mid + 1; - /*printf ("lo = %d, hi = %d\n", lo, hi);*/ - } - while (lo <= hi); - hi = lo--; - /*printf ("interp between %d and %d\n", lo, hi);*/ - for (j=start;jeBands[j+1]-m->eBands[j]; - bits1j = C*N*m->allocVectors[lo*len+j]<>2; - bits2j = hi>=m->nbAllocVectors ? - cap[j] : C*N*m->allocVectors[hi*len+j]<>2; - if (bits1j > 0) - bits1j = IMAX(0, bits1j + trim_offset[j]); - if (bits2j > 0) - bits2j = IMAX(0, bits2j + trim_offset[j]); - if (lo > 0) - bits1j += offsets[j]; - bits2j += offsets[j]; - if (offsets[j]>0) - skip_start = j; - bits2j = IMAX(0,bits2j-bits1j); - bits1[j] = bits1j; - bits2[j] = bits2j; - } - codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap, - total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv, - pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth); - RESTORE_STACK; - return codedBands; -} - diff --git a/thirdparty/opus/celt/rate.h b/thirdparty/opus/celt/rate.h deleted file mode 100644 index 515f7687ce..0000000000 --- a/thirdparty/opus/celt/rate.h +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef RATE_H -#define RATE_H - -#define MAX_PSEUDO 40 -#define LOG_MAX_PSEUDO 6 - -#define CELT_MAX_PULSES 128 - -#define MAX_FINE_BITS 8 - -#define FINE_OFFSET 21 -#define QTHETA_OFFSET 4 -#define QTHETA_OFFSET_TWOPHASE 16 - -#include "cwrs.h" -#include "modes.h" - -void compute_pulse_cache(CELTMode *m, int LM); - -static OPUS_INLINE int get_pulses(int i) -{ - return i<8 ? i : (8 + (i&7)) << ((i>>3)-1); -} - -static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits) -{ - int i; - int lo, hi; - const unsigned char *cache; - - LM++; - cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; - - lo = 0; - hi = cache[0]; - bits--; - for (i=0;i>1; - /* OPT: Make sure this is implemented with a conditional move */ - if ((int)cache[mid] >= bits) - hi = mid; - else - lo = mid; - } - if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits) - return lo; - else - return hi; -} - -static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses) -{ - const unsigned char *cache; - - LM++; - cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; - return pulses == 0 ? 0 : cache[pulses]+1; -} - -/** Compute the pulse allocation, i.e. how many pulses will go in each - * band. - @param m mode - @param offsets Requested increase or decrease in the number of bits for - each band - @param total Number of bands - @param pulses Number of pulses per band (returned) - @return Total number of bits allocated -*/ -int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero, - opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth); - -#endif diff --git a/thirdparty/opus/celt/stack_alloc.h b/thirdparty/opus/celt/stack_alloc.h deleted file mode 100644 index 2b51c8d80c..0000000000 --- a/thirdparty/opus/celt/stack_alloc.h +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright (C) 2002-2003 Jean-Marc Valin - Copyright (C) 2007-2009 Xiph.Org Foundation */ -/** - @file stack_alloc.h - @brief Temporary memory allocation on stack -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef STACK_ALLOC_H -#define STACK_ALLOC_H - -#include "opus_types.h" -#include "opus_defines.h" - -#if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK)) -#error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode." -#endif - -#ifdef USE_ALLOCA -# ifdef WIN32 -# include -# else -# ifdef HAVE_ALLOCA_H -# include -# else -# include -# endif -# endif -#endif - -/** - * @def ALIGN(stack, size) - * - * Aligns the stack to a 'size' boundary - * - * @param stack Stack - * @param size New size boundary - */ - -/** - * @def PUSH(stack, size, type) - * - * Allocates 'size' elements of type 'type' on the stack - * - * @param stack Stack - * @param size Number of elements - * @param type Type of element - */ - -/** - * @def VARDECL(var) - * - * Declare variable on stack - * - * @param var Variable to declare - */ - -/** - * @def ALLOC(var, size, type) - * - * Allocate 'size' elements of 'type' on stack - * - * @param var Name of variable to allocate - * @param size Number of elements - * @param type Type of element - */ - -#if defined(VAR_ARRAYS) - -#define VARDECL(type, var) -#define ALLOC(var, size, type) type var[size] -#define SAVE_STACK -#define RESTORE_STACK -#define ALLOC_STACK -/* C99 does not allow VLAs of size zero */ -#define ALLOC_NONE 1 - -#elif defined(USE_ALLOCA) - -#define VARDECL(type, var) type *var - -# ifdef WIN32 -# define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size))) -# else -# define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size))) -# endif - -#define SAVE_STACK -#define RESTORE_STACK -#define ALLOC_STACK -#define ALLOC_NONE 0 - -#else - -#ifdef CELT_C -char *scratch_ptr=0; -char *global_stack=0; -#else -extern char *global_stack; -extern char *scratch_ptr; -#endif /* CELT_C */ - -#ifdef ENABLE_VALGRIND - -#include - -#ifdef CELT_C -char *global_stack_top=0; -#else -extern char *global_stack_top; -#endif /* CELT_C */ - -#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) -#define PUSH(stack, size, type) (VALGRIND_MAKE_MEM_NOACCESS(stack, global_stack_top-stack),ALIGN((stack),sizeof(type)/sizeof(char)),VALGRIND_MAKE_MEM_UNDEFINED(stack, ((size)*sizeof(type)/sizeof(char))),(stack)+=(2*(size)*sizeof(type)/sizeof(char)),(type*)((stack)-(2*(size)*sizeof(type)/sizeof(char)))) -#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)) -#define ALLOC_STACK char *_saved_stack; ((global_stack = (global_stack==0) ? ((global_stack_top=opus_alloc_scratch(GLOBAL_STACK_SIZE*2)+(GLOBAL_STACK_SIZE*2))-(GLOBAL_STACK_SIZE*2)) : global_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)); _saved_stack = global_stack; - -#else - -#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) -#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) -#if 0 /* Set this to 1 to instrument pseudostack usage */ -#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) -#else -#define RESTORE_STACK (global_stack = _saved_stack) -#endif -#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; - -#endif /* ENABLE_VALGRIND */ - -#include "os_support.h" -#define VARDECL(type, var) type *var -#define ALLOC(var, size, type) var = PUSH(global_stack, size, type) -#define SAVE_STACK char *_saved_stack = global_stack; -#define ALLOC_NONE 0 - -#endif /* VAR_ARRAYS */ - - -#ifdef ENABLE_VALGRIND - -#include -#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) -#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) -#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) -#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) -#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0) -#define OPUS_FPRINTF fprintf - -#else - -static OPUS_INLINE int _opus_false(void) {return 0;} -#define OPUS_CHECK_ARRAY(ptr, len) _opus_false() -#define OPUS_CHECK_VALUE(value) _opus_false() -#define OPUS_PRINT_INT(value) do{}while(0) -#define OPUS_FPRINTF (void) - -#endif - - -#endif /* STACK_ALLOC_H */ diff --git a/thirdparty/opus/celt/static_modes_fixed.h b/thirdparty/opus/celt/static_modes_fixed.h deleted file mode 100644 index 8717d626cb..0000000000 --- a/thirdparty/opus/celt/static_modes_fixed.h +++ /dev/null @@ -1,892 +0,0 @@ -/* The contents of this file was automatically generated by dump_modes.c - with arguments: 48000 960 - It contains static definitions for some pre-defined modes. */ -#include "modes.h" -#include "rate.h" - -#ifdef HAVE_ARM_NE10 -#define OVERRIDE_FFT 1 -#include "static_modes_fixed_arm_ne10.h" -#endif - -#ifndef DEF_WINDOW120 -#define DEF_WINDOW120 -static const opus_val16 window120[120] = { -2, 20, 55, 108, 178, -266, 372, 494, 635, 792, -966, 1157, 1365, 1590, 1831, -2089, 2362, 2651, 2956, 3276, -3611, 3961, 4325, 4703, 5094, -5499, 5916, 6346, 6788, 7241, -7705, 8179, 8663, 9156, 9657, -10167, 10684, 11207, 11736, 12271, -12810, 13353, 13899, 14447, 14997, -15547, 16098, 16648, 17197, 17744, -18287, 18827, 19363, 19893, 20418, -20936, 21447, 21950, 22445, 22931, -23407, 23874, 24330, 24774, 25208, -25629, 26039, 26435, 26819, 27190, -27548, 27893, 28224, 28541, 28845, -29135, 29411, 29674, 29924, 30160, -30384, 30594, 30792, 30977, 31151, -31313, 31463, 31602, 31731, 31849, -31958, 32057, 32148, 32229, 32303, -32370, 32429, 32481, 32528, 32568, -32604, 32634, 32661, 32683, 32701, -32717, 32729, 32740, 32748, 32754, -32758, 32762, 32764, 32766, 32767, -32767, 32767, 32767, 32767, 32767, -}; -#endif - -#ifndef DEF_LOGN400 -#define DEF_LOGN400 -static const opus_int16 logN400[21] = { -0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, }; -#endif - -#ifndef DEF_PULSE_CACHE50 -#define DEF_PULSE_CACHE50 -static const opus_int16 cache_index50[105] = { --1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41, -82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41, -41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41, -41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305, -318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240, -305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240, -240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387, -}; -static const unsigned char cache_bits50[392] = { -40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28, -31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50, -51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65, -66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61, -64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92, -94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123, -124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94, -97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139, -142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35, -28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149, -153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225, -229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157, -166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63, -86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250, -25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180, -185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89, -110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41, -74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138, -163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214, -228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49, -90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47, -87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57, -106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187, -224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127, -182, 234, }; -static const unsigned char cache_caps50[168] = { -224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185, -178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240, -240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160, -160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172, -138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207, -204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185, -185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39, -207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201, -188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193, -193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204, -204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175, -140, 66, 40, }; -#endif - -#ifndef FFT_TWIDDLES48000_960 -#define FFT_TWIDDLES48000_960 -static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { -{32767, 0}, {32766, -429}, -{32757, -858}, {32743, -1287}, -{32724, -1715}, {32698, -2143}, -{32667, -2570}, {32631, -2998}, -{32588, -3425}, {32541, -3851}, -{32488, -4277}, {32429, -4701}, -{32364, -5125}, {32295, -5548}, -{32219, -5971}, {32138, -6393}, -{32051, -6813}, {31960, -7231}, -{31863, -7650}, {31760, -8067}, -{31652, -8481}, {31539, -8895}, -{31419, -9306}, {31294, -9716}, -{31165, -10126}, {31030, -10532}, -{30889, -10937}, {30743, -11340}, -{30592, -11741}, {30436, -12141}, -{30274, -12540}, {30107, -12935}, -{29936, -13328}, {29758, -13718}, -{29577, -14107}, {29390, -14493}, -{29197, -14875}, {29000, -15257}, -{28797, -15635}, {28590, -16010}, -{28379, -16384}, {28162, -16753}, -{27940, -17119}, {27714, -17484}, -{27482, -17845}, {27246, -18205}, -{27006, -18560}, {26760, -18911}, -{26510, -19260}, {26257, -19606}, -{25997, -19947}, {25734, -20286}, -{25466, -20621}, {25194, -20952}, -{24918, -21281}, {24637, -21605}, -{24353, -21926}, {24063, -22242}, -{23770, -22555}, {23473, -22865}, -{23171, -23171}, {22866, -23472}, -{22557, -23769}, {22244, -24063}, -{21927, -24352}, {21606, -24636}, -{21282, -24917}, {20954, -25194}, -{20622, -25465}, {20288, -25733}, -{19949, -25997}, {19607, -26255}, -{19261, -26509}, {18914, -26760}, -{18561, -27004}, {18205, -27246}, -{17846, -27481}, {17485, -27713}, -{17122, -27940}, {16755, -28162}, -{16385, -28378}, {16012, -28590}, -{15636, -28797}, {15258, -28999}, -{14878, -29197}, {14494, -29389}, -{14108, -29576}, {13720, -29757}, -{13329, -29934}, {12937, -30107}, -{12540, -30274}, {12142, -30435}, -{11744, -30592}, {11342, -30743}, -{10939, -30889}, {10534, -31030}, -{10127, -31164}, {9718, -31294}, -{9307, -31418}, {8895, -31537}, -{8482, -31652}, {8067, -31759}, -{7650, -31862}, {7233, -31960}, -{6815, -32051}, {6393, -32138}, -{5973, -32219}, {5549, -32294}, -{5127, -32364}, {4703, -32429}, -{4278, -32487}, {3852, -32541}, -{3426, -32588}, {2999, -32630}, -{2572, -32667}, {2144, -32698}, -{1716, -32724}, {1287, -32742}, -{860, -32757}, {430, -32766}, -{0, -32767}, {-429, -32766}, -{-858, -32757}, {-1287, -32743}, -{-1715, -32724}, {-2143, -32698}, -{-2570, -32667}, {-2998, -32631}, -{-3425, -32588}, {-3851, -32541}, -{-4277, -32488}, {-4701, -32429}, -{-5125, -32364}, {-5548, -32295}, -{-5971, -32219}, {-6393, -32138}, -{-6813, -32051}, {-7231, -31960}, -{-7650, -31863}, {-8067, -31760}, -{-8481, -31652}, {-8895, -31539}, -{-9306, -31419}, {-9716, -31294}, -{-10126, -31165}, {-10532, -31030}, -{-10937, -30889}, {-11340, -30743}, -{-11741, -30592}, {-12141, -30436}, -{-12540, -30274}, {-12935, -30107}, -{-13328, -29936}, {-13718, -29758}, -{-14107, -29577}, {-14493, -29390}, -{-14875, -29197}, {-15257, -29000}, -{-15635, -28797}, {-16010, -28590}, -{-16384, -28379}, {-16753, -28162}, -{-17119, -27940}, {-17484, -27714}, -{-17845, -27482}, {-18205, -27246}, -{-18560, -27006}, {-18911, -26760}, -{-19260, -26510}, {-19606, -26257}, -{-19947, -25997}, {-20286, -25734}, -{-20621, -25466}, {-20952, -25194}, -{-21281, -24918}, {-21605, -24637}, -{-21926, -24353}, {-22242, -24063}, -{-22555, -23770}, {-22865, -23473}, -{-23171, -23171}, {-23472, -22866}, -{-23769, -22557}, {-24063, -22244}, -{-24352, -21927}, {-24636, -21606}, -{-24917, -21282}, {-25194, -20954}, -{-25465, -20622}, {-25733, -20288}, -{-25997, -19949}, {-26255, -19607}, -{-26509, -19261}, {-26760, -18914}, -{-27004, -18561}, {-27246, -18205}, -{-27481, -17846}, {-27713, -17485}, -{-27940, -17122}, {-28162, -16755}, -{-28378, -16385}, {-28590, -16012}, -{-28797, -15636}, {-28999, -15258}, -{-29197, -14878}, {-29389, -14494}, -{-29576, -14108}, {-29757, -13720}, -{-29934, -13329}, {-30107, -12937}, -{-30274, -12540}, {-30435, -12142}, -{-30592, -11744}, {-30743, -11342}, -{-30889, -10939}, {-31030, -10534}, -{-31164, -10127}, {-31294, -9718}, -{-31418, -9307}, {-31537, -8895}, -{-31652, -8482}, {-31759, -8067}, -{-31862, -7650}, {-31960, -7233}, -{-32051, -6815}, {-32138, -6393}, -{-32219, -5973}, {-32294, -5549}, -{-32364, -5127}, {-32429, -4703}, -{-32487, -4278}, {-32541, -3852}, -{-32588, -3426}, {-32630, -2999}, -{-32667, -2572}, {-32698, -2144}, -{-32724, -1716}, {-32742, -1287}, -{-32757, -860}, {-32766, -430}, -{-32767, 0}, {-32766, 429}, -{-32757, 858}, {-32743, 1287}, -{-32724, 1715}, {-32698, 2143}, -{-32667, 2570}, {-32631, 2998}, -{-32588, 3425}, {-32541, 3851}, -{-32488, 4277}, {-32429, 4701}, -{-32364, 5125}, {-32295, 5548}, -{-32219, 5971}, {-32138, 6393}, -{-32051, 6813}, {-31960, 7231}, -{-31863, 7650}, {-31760, 8067}, -{-31652, 8481}, {-31539, 8895}, -{-31419, 9306}, {-31294, 9716}, -{-31165, 10126}, {-31030, 10532}, -{-30889, 10937}, {-30743, 11340}, -{-30592, 11741}, {-30436, 12141}, -{-30274, 12540}, {-30107, 12935}, -{-29936, 13328}, {-29758, 13718}, -{-29577, 14107}, {-29390, 14493}, -{-29197, 14875}, {-29000, 15257}, -{-28797, 15635}, {-28590, 16010}, -{-28379, 16384}, {-28162, 16753}, -{-27940, 17119}, {-27714, 17484}, -{-27482, 17845}, {-27246, 18205}, -{-27006, 18560}, {-26760, 18911}, -{-26510, 19260}, {-26257, 19606}, -{-25997, 19947}, {-25734, 20286}, -{-25466, 20621}, {-25194, 20952}, -{-24918, 21281}, {-24637, 21605}, -{-24353, 21926}, {-24063, 22242}, -{-23770, 22555}, {-23473, 22865}, -{-23171, 23171}, {-22866, 23472}, -{-22557, 23769}, {-22244, 24063}, -{-21927, 24352}, {-21606, 24636}, -{-21282, 24917}, {-20954, 25194}, -{-20622, 25465}, {-20288, 25733}, -{-19949, 25997}, {-19607, 26255}, -{-19261, 26509}, {-18914, 26760}, -{-18561, 27004}, {-18205, 27246}, -{-17846, 27481}, {-17485, 27713}, -{-17122, 27940}, {-16755, 28162}, -{-16385, 28378}, {-16012, 28590}, -{-15636, 28797}, {-15258, 28999}, -{-14878, 29197}, {-14494, 29389}, -{-14108, 29576}, {-13720, 29757}, -{-13329, 29934}, {-12937, 30107}, -{-12540, 30274}, {-12142, 30435}, -{-11744, 30592}, {-11342, 30743}, -{-10939, 30889}, {-10534, 31030}, -{-10127, 31164}, {-9718, 31294}, -{-9307, 31418}, {-8895, 31537}, -{-8482, 31652}, {-8067, 31759}, -{-7650, 31862}, {-7233, 31960}, -{-6815, 32051}, {-6393, 32138}, -{-5973, 32219}, {-5549, 32294}, -{-5127, 32364}, {-4703, 32429}, -{-4278, 32487}, {-3852, 32541}, -{-3426, 32588}, {-2999, 32630}, -{-2572, 32667}, {-2144, 32698}, -{-1716, 32724}, {-1287, 32742}, -{-860, 32757}, {-430, 32766}, -{0, 32767}, {429, 32766}, -{858, 32757}, {1287, 32743}, -{1715, 32724}, {2143, 32698}, -{2570, 32667}, {2998, 32631}, -{3425, 32588}, {3851, 32541}, -{4277, 32488}, {4701, 32429}, -{5125, 32364}, {5548, 32295}, -{5971, 32219}, {6393, 32138}, -{6813, 32051}, {7231, 31960}, -{7650, 31863}, {8067, 31760}, -{8481, 31652}, {8895, 31539}, -{9306, 31419}, {9716, 31294}, -{10126, 31165}, {10532, 31030}, -{10937, 30889}, {11340, 30743}, -{11741, 30592}, {12141, 30436}, -{12540, 30274}, {12935, 30107}, -{13328, 29936}, {13718, 29758}, -{14107, 29577}, {14493, 29390}, -{14875, 29197}, {15257, 29000}, -{15635, 28797}, {16010, 28590}, -{16384, 28379}, {16753, 28162}, -{17119, 27940}, {17484, 27714}, -{17845, 27482}, {18205, 27246}, -{18560, 27006}, {18911, 26760}, -{19260, 26510}, {19606, 26257}, -{19947, 25997}, {20286, 25734}, -{20621, 25466}, {20952, 25194}, -{21281, 24918}, {21605, 24637}, -{21926, 24353}, {22242, 24063}, -{22555, 23770}, {22865, 23473}, -{23171, 23171}, {23472, 22866}, -{23769, 22557}, {24063, 22244}, -{24352, 21927}, {24636, 21606}, -{24917, 21282}, {25194, 20954}, -{25465, 20622}, {25733, 20288}, -{25997, 19949}, {26255, 19607}, -{26509, 19261}, {26760, 18914}, -{27004, 18561}, {27246, 18205}, -{27481, 17846}, {27713, 17485}, -{27940, 17122}, {28162, 16755}, -{28378, 16385}, {28590, 16012}, -{28797, 15636}, {28999, 15258}, -{29197, 14878}, {29389, 14494}, -{29576, 14108}, {29757, 13720}, -{29934, 13329}, {30107, 12937}, -{30274, 12540}, {30435, 12142}, -{30592, 11744}, {30743, 11342}, -{30889, 10939}, {31030, 10534}, -{31164, 10127}, {31294, 9718}, -{31418, 9307}, {31537, 8895}, -{31652, 8482}, {31759, 8067}, -{31862, 7650}, {31960, 7233}, -{32051, 6815}, {32138, 6393}, -{32219, 5973}, {32294, 5549}, -{32364, 5127}, {32429, 4703}, -{32487, 4278}, {32541, 3852}, -{32588, 3426}, {32630, 2999}, -{32667, 2572}, {32698, 2144}, -{32724, 1716}, {32742, 1287}, -{32757, 860}, {32766, 430}, -}; -#ifndef FFT_BITREV480 -#define FFT_BITREV480 -static const opus_int16 fft_bitrev480[480] = { -0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, -8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, -16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, -24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, -4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, -12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, -20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, -28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, -1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, -9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, -17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, -25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, -5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, -13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, -21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, -29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, -2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, -10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, -18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, -26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, -6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, -14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, -22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, -30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, -3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, -11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, -19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, -27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, -7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, -15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, -23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, -31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, -}; -#endif - -#ifndef FFT_BITREV240 -#define FFT_BITREV240 -static const opus_int16 fft_bitrev240[240] = { -0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, -4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, -8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, -12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, -1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, -5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, -9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, -13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, -2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, -6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, -10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, -14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, -3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, -7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, -11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, -15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, -}; -#endif - -#ifndef FFT_BITREV120 -#define FFT_BITREV120 -static const opus_int16 fft_bitrev120[120] = { -0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, -4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, -1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, -5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, -2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, -6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, -3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, -7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, -}; -#endif - -#ifndef FFT_BITREV60 -#define FFT_BITREV60 -static const opus_int16 fft_bitrev60[60] = { -0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, -1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, -2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, -3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, -}; -#endif - -#ifndef FFT_STATE48000_960_0 -#define FFT_STATE48000_960_0 -static const kiss_fft_state fft_state48000_960_0 = { -480, /* nfft */ -17476, /* scale */ -8, /* scale_shift */ --1, /* shift */ -{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev480, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_480, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_1 -#define FFT_STATE48000_960_1 -static const kiss_fft_state fft_state48000_960_1 = { -240, /* nfft */ -17476, /* scale */ -7, /* scale_shift */ -1, /* shift */ -{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev240, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_240, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_2 -#define FFT_STATE48000_960_2 -static const kiss_fft_state fft_state48000_960_2 = { -120, /* nfft */ -17476, /* scale */ -6, /* scale_shift */ -2, /* shift */ -{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev120, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_120, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_3 -#define FFT_STATE48000_960_3 -static const kiss_fft_state fft_state48000_960_3 = { -60, /* nfft */ -17476, /* scale */ -5, /* scale_shift */ -3, /* shift */ -{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev60, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_60, -#else -NULL, -#endif -}; -#endif - -#endif - -#ifndef MDCT_TWIDDLES960 -#define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[1800] = { -32767, 32767, 32767, 32766, 32765, -32763, 32761, 32759, 32756, 32753, -32750, 32746, 32742, 32738, 32733, -32728, 32722, 32717, 32710, 32704, -32697, 32690, 32682, 32674, 32666, -32657, 32648, 32639, 32629, 32619, -32609, 32598, 32587, 32576, 32564, -32552, 32539, 32526, 32513, 32500, -32486, 32472, 32457, 32442, 32427, -32411, 32395, 32379, 32362, 32345, -32328, 32310, 32292, 32274, 32255, -32236, 32217, 32197, 32177, 32157, -32136, 32115, 32093, 32071, 32049, -32027, 32004, 31981, 31957, 31933, -31909, 31884, 31859, 31834, 31809, -31783, 31756, 31730, 31703, 31676, -31648, 31620, 31592, 31563, 31534, -31505, 31475, 31445, 31415, 31384, -31353, 31322, 31290, 31258, 31226, -31193, 31160, 31127, 31093, 31059, -31025, 30990, 30955, 30920, 30884, -30848, 30812, 30775, 30738, 30701, -30663, 30625, 30587, 30548, 30509, -30470, 30430, 30390, 30350, 30309, -30269, 30227, 30186, 30144, 30102, -30059, 30016, 29973, 29930, 29886, -29842, 29797, 29752, 29707, 29662, -29616, 29570, 29524, 29477, 29430, -29383, 29335, 29287, 29239, 29190, -29142, 29092, 29043, 28993, 28943, -28892, 28842, 28791, 28739, 28688, -28636, 28583, 28531, 28478, 28425, -28371, 28317, 28263, 28209, 28154, -28099, 28044, 27988, 27932, 27876, -27820, 27763, 27706, 27648, 27591, -27533, 27474, 27416, 27357, 27298, -27238, 27178, 27118, 27058, 26997, -26936, 26875, 26814, 26752, 26690, -26628, 26565, 26502, 26439, 26375, -26312, 26247, 26183, 26119, 26054, -25988, 25923, 25857, 25791, 25725, -25658, 25592, 25524, 25457, 25389, -25322, 25253, 25185, 25116, 25047, -24978, 24908, 24838, 24768, 24698, -24627, 24557, 24485, 24414, 24342, -24270, 24198, 24126, 24053, 23980, -23907, 23834, 23760, 23686, 23612, -23537, 23462, 23387, 23312, 23237, -23161, 23085, 23009, 22932, 22856, -22779, 22701, 22624, 22546, 22468, -22390, 22312, 22233, 22154, 22075, -21996, 21916, 21836, 21756, 21676, -21595, 21515, 21434, 21352, 21271, -21189, 21107, 21025, 20943, 20860, -20777, 20694, 20611, 20528, 20444, -20360, 20276, 20192, 20107, 20022, -19937, 19852, 19767, 19681, 19595, -19509, 19423, 19336, 19250, 19163, -19076, 18988, 18901, 18813, 18725, -18637, 18549, 18460, 18372, 18283, -18194, 18104, 18015, 17925, 17835, -17745, 17655, 17565, 17474, 17383, -17292, 17201, 17110, 17018, 16927, -16835, 16743, 16650, 16558, 16465, -16372, 16279, 16186, 16093, 15999, -15906, 15812, 15718, 15624, 15529, -15435, 15340, 15245, 15150, 15055, -14960, 14864, 14769, 14673, 14577, -14481, 14385, 14288, 14192, 14095, -13998, 13901, 13804, 13706, 13609, -13511, 13414, 13316, 13218, 13119, -13021, 12923, 12824, 12725, 12626, -12527, 12428, 12329, 12230, 12130, -12030, 11930, 11831, 11730, 11630, -11530, 11430, 11329, 11228, 11128, -11027, 10926, 10824, 10723, 10622, -10520, 10419, 10317, 10215, 10113, -10011, 9909, 9807, 9704, 9602, -9499, 9397, 9294, 9191, 9088, -8985, 8882, 8778, 8675, 8572, -8468, 8364, 8261, 8157, 8053, -7949, 7845, 7741, 7637, 7532, -7428, 7323, 7219, 7114, 7009, -6905, 6800, 6695, 6590, 6485, -6380, 6274, 6169, 6064, 5958, -5853, 5747, 5642, 5536, 5430, -5325, 5219, 5113, 5007, 4901, -4795, 4689, 4583, 4476, 4370, -4264, 4157, 4051, 3945, 3838, -3732, 3625, 3518, 3412, 3305, -3198, 3092, 2985, 2878, 2771, -2664, 2558, 2451, 2344, 2237, -2130, 2023, 1916, 1809, 1702, -1594, 1487, 1380, 1273, 1166, -1059, 952, 844, 737, 630, -523, 416, 308, 201, 94, --13, -121, -228, -335, -442, --550, -657, -764, -871, -978, --1086, -1193, -1300, -1407, -1514, --1621, -1728, -1835, -1942, -2049, --2157, -2263, -2370, -2477, -2584, --2691, -2798, -2905, -3012, -3118, --3225, -3332, -3439, -3545, -3652, --3758, -3865, -3971, -4078, -4184, --4290, -4397, -4503, -4609, -4715, --4821, -4927, -5033, -5139, -5245, --5351, -5457, -5562, -5668, -5774, --5879, -5985, -6090, -6195, -6301, --6406, -6511, -6616, -6721, -6826, --6931, -7036, -7140, -7245, -7349, --7454, -7558, -7663, -7767, -7871, --7975, -8079, -8183, -8287, -8390, --8494, -8597, -8701, -8804, -8907, --9011, -9114, -9217, -9319, -9422, --9525, -9627, -9730, -9832, -9934, --10037, -10139, -10241, -10342, -10444, --10546, -10647, -10748, -10850, -10951, --11052, -11153, -11253, -11354, -11455, --11555, -11655, -11756, -11856, -11955, --12055, -12155, -12254, -12354, -12453, --12552, -12651, -12750, -12849, -12947, --13046, -13144, -13242, -13340, -13438, --13536, -13633, -13731, -13828, -13925, --14022, -14119, -14216, -14312, -14409, --14505, -14601, -14697, -14793, -14888, --14984, -15079, -15174, -15269, -15364, --15459, -15553, -15647, -15741, -15835, --15929, -16023, -16116, -16210, -16303, --16396, -16488, -16581, -16673, -16766, --16858, -16949, -17041, -17133, -17224, --17315, -17406, -17497, -17587, -17678, --17768, -17858, -17948, -18037, -18127, --18216, -18305, -18394, -18483, -18571, --18659, -18747, -18835, -18923, -19010, --19098, -19185, -19271, -19358, -19444, --19531, -19617, -19702, -19788, -19873, --19959, -20043, -20128, -20213, -20297, --20381, -20465, -20549, -20632, -20715, --20798, -20881, -20963, -21046, -21128, --21210, -21291, -21373, -21454, -21535, --21616, -21696, -21776, -21856, -21936, --22016, -22095, -22174, -22253, -22331, --22410, -22488, -22566, -22643, -22721, --22798, -22875, -22951, -23028, -23104, --23180, -23256, -23331, -23406, -23481, --23556, -23630, -23704, -23778, -23852, --23925, -23998, -24071, -24144, -24216, --24288, -24360, -24432, -24503, -24574, --24645, -24716, -24786, -24856, -24926, --24995, -25064, -25133, -25202, -25270, --25339, -25406, -25474, -25541, -25608, --25675, -25742, -25808, -25874, -25939, --26005, -26070, -26135, -26199, -26264, --26327, -26391, -26455, -26518, -26581, --26643, -26705, -26767, -26829, -26891, --26952, -27013, -27073, -27133, -27193, --27253, -27312, -27372, -27430, -27489, --27547, -27605, -27663, -27720, -27777, --27834, -27890, -27946, -28002, -28058, --28113, -28168, -28223, -28277, -28331, --28385, -28438, -28491, -28544, -28596, --28649, -28701, -28752, -28803, -28854, --28905, -28955, -29006, -29055, -29105, --29154, -29203, -29251, -29299, -29347, --29395, -29442, -29489, -29535, -29582, --29628, -29673, -29719, -29764, -29808, --29853, -29897, -29941, -29984, -30027, --30070, -30112, -30154, -30196, -30238, --30279, -30320, -30360, -30400, -30440, --30480, -30519, -30558, -30596, -30635, --30672, -30710, -30747, -30784, -30821, --30857, -30893, -30929, -30964, -30999, --31033, -31068, -31102, -31135, -31168, --31201, -31234, -31266, -31298, -31330, --31361, -31392, -31422, -31453, -31483, --31512, -31541, -31570, -31599, -31627, --31655, -31682, -31710, -31737, -31763, --31789, -31815, -31841, -31866, -31891, --31915, -31939, -31963, -31986, -32010, --32032, -32055, -32077, -32099, -32120, --32141, -32162, -32182, -32202, -32222, --32241, -32260, -32279, -32297, -32315, --32333, -32350, -32367, -32383, -32399, --32415, -32431, -32446, -32461, -32475, --32489, -32503, -32517, -32530, -32542, --32555, -32567, -32579, -32590, -32601, --32612, -32622, -32632, -32641, -32651, --32659, -32668, -32676, -32684, -32692, --32699, -32706, -32712, -32718, -32724, --32729, -32734, -32739, -32743, -32747, --32751, -32754, -32757, -32760, -32762, --32764, -32765, -32767, -32767, -32767, -32767, 32767, 32765, 32761, 32756, -32750, 32742, 32732, 32722, 32710, -32696, 32681, 32665, 32647, 32628, -32608, 32586, 32562, 32538, 32512, -32484, 32455, 32425, 32393, 32360, -32326, 32290, 32253, 32214, 32174, -32133, 32090, 32046, 32001, 31954, -31906, 31856, 31805, 31753, 31700, -31645, 31588, 31530, 31471, 31411, -31349, 31286, 31222, 31156, 31089, -31020, 30951, 30880, 30807, 30733, -30658, 30582, 30504, 30425, 30345, -30263, 30181, 30096, 30011, 29924, -29836, 29747, 29656, 29564, 29471, -29377, 29281, 29184, 29086, 28987, -28886, 28784, 28681, 28577, 28471, -28365, 28257, 28147, 28037, 27925, -27812, 27698, 27583, 27467, 27349, -27231, 27111, 26990, 26868, 26744, -26620, 26494, 26367, 26239, 26110, -25980, 25849, 25717, 25583, 25449, -25313, 25176, 25038, 24900, 24760, -24619, 24477, 24333, 24189, 24044, -23898, 23751, 23602, 23453, 23303, -23152, 22999, 22846, 22692, 22537, -22380, 22223, 22065, 21906, 21746, -21585, 21423, 21261, 21097, 20933, -20767, 20601, 20434, 20265, 20096, -19927, 19756, 19584, 19412, 19239, -19065, 18890, 18714, 18538, 18361, -18183, 18004, 17824, 17644, 17463, -17281, 17098, 16915, 16731, 16546, -16361, 16175, 15988, 15800, 15612, -15423, 15234, 15043, 14852, 14661, -14469, 14276, 14083, 13889, 13694, -13499, 13303, 13107, 12910, 12713, -12515, 12317, 12118, 11918, 11718, -11517, 11316, 11115, 10913, 10710, -10508, 10304, 10100, 9896, 9691, -9486, 9281, 9075, 8869, 8662, -8455, 8248, 8040, 7832, 7623, -7415, 7206, 6996, 6787, 6577, -6366, 6156, 5945, 5734, 5523, -5311, 5100, 4888, 4675, 4463, -4251, 4038, 3825, 3612, 3399, -3185, 2972, 2758, 2544, 2330, -2116, 1902, 1688, 1474, 1260, -1045, 831, 617, 402, 188, --27, -241, -456, -670, -885, --1099, -1313, -1528, -1742, -1956, --2170, -2384, -2598, -2811, -3025, --3239, -3452, -3665, -3878, -4091, --4304, -4516, -4728, -4941, -5153, --5364, -5576, -5787, -5998, -6209, --6419, -6629, -6839, -7049, -7258, --7467, -7676, -7884, -8092, -8300, --8507, -8714, -8920, -9127, -9332, --9538, -9743, -9947, -10151, -10355, --10558, -10761, -10963, -11165, -11367, --11568, -11768, -11968, -12167, -12366, --12565, -12762, -12960, -13156, -13352, --13548, -13743, -13937, -14131, -14324, --14517, -14709, -14900, -15091, -15281, --15470, -15659, -15847, -16035, -16221, --16407, -16593, -16777, -16961, -17144, --17326, -17508, -17689, -17869, -18049, --18227, -18405, -18582, -18758, -18934, --19108, -19282, -19455, -19627, -19799, --19969, -20139, -20308, -20475, -20642, --20809, -20974, -21138, -21301, -21464, --21626, -21786, -21946, -22105, -22263, --22420, -22575, -22730, -22884, -23037, --23189, -23340, -23490, -23640, -23788, --23935, -24080, -24225, -24369, -24512, --24654, -24795, -24934, -25073, -25211, --25347, -25482, -25617, -25750, -25882, --26013, -26143, -26272, -26399, -26526, --26651, -26775, -26898, -27020, -27141, --27260, -27379, -27496, -27612, -27727, --27841, -27953, -28065, -28175, -28284, --28391, -28498, -28603, -28707, -28810, --28911, -29012, -29111, -29209, -29305, --29401, -29495, -29587, -29679, -29769, --29858, -29946, -30032, -30118, -30201, --30284, -30365, -30445, -30524, -30601, --30677, -30752, -30825, -30897, -30968, --31038, -31106, -31172, -31238, -31302, --31365, -31426, -31486, -31545, -31602, --31658, -31713, -31766, -31818, -31869, --31918, -31966, -32012, -32058, -32101, --32144, -32185, -32224, -32262, -32299, --32335, -32369, -32401, -32433, -32463, --32491, -32518, -32544, -32568, -32591, --32613, -32633, -32652, -32669, -32685, --32700, -32713, -32724, -32735, -32744, --32751, -32757, -32762, -32766, -32767, -32767, 32764, 32755, 32741, 32720, -32694, 32663, 32626, 32583, 32535, -32481, 32421, 32356, 32286, 32209, -32128, 32041, 31948, 31850, 31747, -31638, 31523, 31403, 31278, 31148, -31012, 30871, 30724, 30572, 30415, -30253, 30086, 29913, 29736, 29553, -29365, 29172, 28974, 28771, 28564, -28351, 28134, 27911, 27684, 27452, -27216, 26975, 26729, 26478, 26223, -25964, 25700, 25432, 25159, 24882, -24601, 24315, 24026, 23732, 23434, -23133, 22827, 22517, 22204, 21886, -21565, 21240, 20912, 20580, 20244, -19905, 19563, 19217, 18868, 18516, -18160, 17802, 17440, 17075, 16708, -16338, 15964, 15588, 15210, 14829, -14445, 14059, 13670, 13279, 12886, -12490, 12093, 11693, 11291, 10888, -10482, 10075, 9666, 9255, 8843, -8429, 8014, 7597, 7180, 6760, -6340, 5919, 5496, 5073, 4649, -4224, 3798, 3372, 2945, 2517, -2090, 1661, 1233, 804, 375, --54, -483, -911, -1340, -1768, --2197, -2624, -3052, -3479, -3905, --4330, -4755, -5179, -5602, -6024, --6445, -6865, -7284, -7702, -8118, --8533, -8946, -9358, -9768, -10177, --10584, -10989, -11392, -11793, -12192, --12589, -12984, -13377, -13767, -14155, --14541, -14924, -15305, -15683, -16058, --16430, -16800, -17167, -17531, -17892, --18249, -18604, -18956, -19304, -19649, --19990, -20329, -20663, -20994, -21322, --21646, -21966, -22282, -22595, -22904, --23208, -23509, -23806, -24099, -24387, --24672, -24952, -25228, -25499, -25766, --26029, -26288, -26541, -26791, -27035, --27275, -27511, -27741, -27967, -28188, --28405, -28616, -28823, -29024, -29221, --29412, -29599, -29780, -29957, -30128, --30294, -30455, -30611, -30761, -30906, --31046, -31181, -31310, -31434, -31552, --31665, -31773, -31875, -31972, -32063, --32149, -32229, -32304, -32373, -32437, --32495, -32547, -32594, -32635, -32671, --32701, -32726, -32745, -32758, -32766, -32767, 32754, 32717, 32658, 32577, -32473, 32348, 32200, 32029, 31837, -31624, 31388, 31131, 30853, 30553, -30232, 29891, 29530, 29148, 28746, -28324, 27883, 27423, 26944, 26447, -25931, 25398, 24847, 24279, 23695, -23095, 22478, 21846, 21199, 20538, -19863, 19174, 18472, 17757, 17030, -16291, 15541, 14781, 14010, 13230, -12441, 11643, 10837, 10024, 9204, -8377, 7545, 6708, 5866, 5020, -4171, 3319, 2464, 1608, 751, --107, -965, -1822, -2678, -3532, --4383, -5232, -6077, -6918, -7754, --8585, -9409, -10228, -11039, -11843, --12639, -13426, -14204, -14972, -15730, --16477, -17213, -17937, -18648, -19347, --20033, -20705, -21363, -22006, -22634, --23246, -23843, -24423, -24986, -25533, --26062, -26573, -27066, -27540, -27995, --28431, -28848, -29245, -29622, -29979, --30315, -30630, -30924, -31197, -31449, --31679, -31887, -32074, -32239, -32381, --32501, -32600, -32675, -32729, -32759, -}; -#endif - -static const CELTMode mode48000_960_120 = { -48000, /* Fs */ -120, /* overlap */ -21, /* nbEBands */ -21, /* effEBands */ -{27853, 0, 4096, 8192, }, /* preemph */ -eband5ms, /* eBands */ -3, /* maxLM */ -8, /* nbShortMdcts */ -120, /* shortMdctSize */ -11, /* nbAllocVectors */ -band_allocation, /* allocVectors */ -logN400, /* logN */ -window120, /* window */ -{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */ -{392, cache_index50, cache_bits50, cache_caps50}, /* cache */ -}; - -/* List of all the available modes */ -#define TOTAL_MODES 1 -static const CELTMode * const static_mode_list[TOTAL_MODES] = { -&mode48000_960_120, -}; diff --git a/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h b/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h deleted file mode 100644 index b8ef0cee98..0000000000 --- a/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h +++ /dev/null @@ -1,388 +0,0 @@ -/* The contents of this file was automatically generated by - * dump_mode_arm_ne10.c with arguments: 48000 960 - * It contains static definitions for some pre-defined modes. */ -#include - -#ifndef NE10_FFT_PARAMS48000_960 -#define NE10_FFT_PARAMS48000_960 -static const ne10_int32_t ne10_factors_480[64] = { -4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_240[64] = { -3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_120[64] = { -3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_60[64] = { -2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, -{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, -{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, -{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, -{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, -{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, -{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, -{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, -{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, -{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, -{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, -{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, -{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, -{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, -{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, -{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, -{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, -{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, -{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, -{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, -{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, -{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, -{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, -{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, -{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570}, -{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154}, -{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172}, -{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068}, -{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822}, -{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146}, -{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682}, -{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231}, -{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791}, -{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029}, -{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313}, -{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783}, -{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661}, -{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541}, -{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450}, -{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914}, -{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194}, -{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807}, -{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067}, -{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658}, -{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677}, -{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704}, -{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277}, -{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512}, -{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510}, -{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682}, -{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424}, -{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524}, -{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195}, -{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164}, -{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771}, -{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961}, -{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705}, -{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540}, -{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994}, -{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540}, -{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646}, -{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814}, -{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593}, -{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667}, -{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, -{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, -{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, -{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, -{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, -{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, -{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, -{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, -{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, -{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, -{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, -{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, -{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, -{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, -{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, -{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, -{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, -{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, -{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, -{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, -{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584}, -{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622}, -{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955}, -{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651}, -{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703}, -{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114}, -{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330}, -{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403}, -{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078}, -{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372}, -{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738}, -{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248}, -{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400}, -{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552}, -{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141}, -{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553}, -{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377}, -{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448}, -{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240}, -{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574}, -{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630}, -{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288}, -{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960}, -{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934}, -{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097}, -{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189}, -{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282}, -{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280}, -{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875}, -{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603}, -{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339}, -{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386}, -{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674}, -{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010}, -{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564}, -{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860}, -{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118}, -{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768}, -{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839}, -{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595}, -{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001}, -{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837}, -{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918}, -{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354}, -{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119}, -{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555}, -{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188}, -{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928}, -{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484}, -{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706}, -{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252}, -{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149}, -{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287}, -{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850}, -{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059}, -{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202}, -{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458}, -{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823}, -{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542}, -{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015}, -}; -static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, -{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, -{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, -{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, -{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, -{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, -{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, -{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, -{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, -{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, -{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, -{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, -{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, -{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, -{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, -{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, -{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, -{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, -{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, -{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, -{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, -{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, -{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, -{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, -{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, -{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, -{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, -{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, -{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, -{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, -{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, -{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, -{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, -{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, -{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, -{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496}, -{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855}, -{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883}, -{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330}, -{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268}, -{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035}, -{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910}, -{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276}, -{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785}, -{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298}, -{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465}, -{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435}, -{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248}, -{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368}, -{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912}, -{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536}, -{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647}, -{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493}, -{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827}, -{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413}, -{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096}, -{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084}, -{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406}, -{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881}, -{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895}, -{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257}, -{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506}, -{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107}, -{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239}, -{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668}, -}; -static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, -{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, -{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, -{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, -{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, -{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, -{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, -{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, -{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, -{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, -{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, -{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, -{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, -{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, -{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, -{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, -{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, -{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, -{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, -{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, -{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, -{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, -{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, -{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, -}; -static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, -{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, -{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, -{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, -{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, -}; -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = { -120, -(ne10_int32_t *)ne10_factors_480, -(ne10_fft_cpx_int32_t *)ne10_twiddles_480, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120], -}; -static const arch_fft_state cfg_arch_480 = { -1, -(void *)&ne10_fft_state_int32_t_480, -}; - -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = { -60, -(ne10_int32_t *)ne10_factors_240, -(ne10_fft_cpx_int32_t *)ne10_twiddles_240, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60], -}; -static const arch_fft_state cfg_arch_240 = { -1, -(void *)&ne10_fft_state_int32_t_240, -}; - -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = { -30, -(ne10_int32_t *)ne10_factors_120, -(ne10_fft_cpx_int32_t *)ne10_twiddles_120, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30], -}; -static const arch_fft_state cfg_arch_120 = { -1, -(void *)&ne10_fft_state_int32_t_120, -}; - -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = { -15, -(ne10_int32_t *)ne10_factors_60, -(ne10_fft_cpx_int32_t *)ne10_twiddles_60, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15], -}; -static const arch_fft_state cfg_arch_60 = { -1, -(void *)&ne10_fft_state_int32_t_60, -}; - -#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/thirdparty/opus/celt/static_modes_float.h b/thirdparty/opus/celt/static_modes_float.h deleted file mode 100644 index e102a38391..0000000000 --- a/thirdparty/opus/celt/static_modes_float.h +++ /dev/null @@ -1,888 +0,0 @@ -/* The contents of this file was automatically generated by dump_modes.c - with arguments: 48000 960 - It contains static definitions for some pre-defined modes. */ -#include "modes.h" -#include "rate.h" - -#ifdef HAVE_ARM_NE10 -#define OVERRIDE_FFT 1 -#include "static_modes_float_arm_ne10.h" -#endif - -#ifndef DEF_WINDOW120 -#define DEF_WINDOW120 -static const opus_val16 window120[120] = { -6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f, -0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f, -0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f, -0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f, -0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f, -0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f, -0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f, -0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f, -0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f, -0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f, -0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f, -0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f, -0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f, -0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f, -0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f, -0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f, -0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f, -0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f, -0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f, -0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f, -0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f, -0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f, -0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f, -0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f, -}; -#endif - -#ifndef DEF_LOGN400 -#define DEF_LOGN400 -static const opus_int16 logN400[21] = { -0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, }; -#endif - -#ifndef DEF_PULSE_CACHE50 -#define DEF_PULSE_CACHE50 -static const opus_int16 cache_index50[105] = { --1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41, -82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41, -41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41, -41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305, -318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240, -305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240, -240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387, -}; -static const unsigned char cache_bits50[392] = { -40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28, -31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50, -51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65, -66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61, -64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92, -94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123, -124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94, -97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139, -142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35, -28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149, -153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225, -229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157, -166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63, -86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250, -25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180, -185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89, -110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41, -74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138, -163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214, -228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49, -90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47, -87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57, -106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187, -224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127, -182, 234, }; -static const unsigned char cache_caps50[168] = { -224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185, -178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240, -240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160, -160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172, -138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207, -204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185, -185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39, -207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201, -188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193, -193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204, -204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175, -140, 66, 40, }; -#endif - -#ifndef FFT_TWIDDLES48000_960 -#define FFT_TWIDDLES48000_960 -static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { -{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f}, -{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f}, -{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f}, -{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f}, -{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f}, -{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f}, -{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f}, -{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f}, -{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f}, -{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f}, -{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f}, -{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f}, -{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f}, -{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f}, -{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f}, -{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f}, -{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f}, -{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f}, -{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f}, -{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f}, -{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f}, -{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f}, -{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f}, -{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f}, -{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f}, -{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f}, -{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f}, -{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f}, -{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f}, -{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f}, -{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f}, -{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f}, -{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f}, -{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f}, -{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f}, -{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f}, -{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f}, -{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f}, -{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f}, -{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f}, -{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f}, -{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f}, -{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f}, -{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f}, -{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f}, -{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f}, -{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f}, -{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f}, -{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f}, -{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f}, -{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f}, -{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f}, -{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f}, -{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f}, -{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f}, -{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f}, -{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f}, -{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f}, -{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f}, -{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f}, -{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f}, -{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f}, -{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f}, -{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f}, -{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f}, -{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f}, -{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f}, -{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f}, -{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f}, -{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f}, -{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f}, -{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f}, -{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f}, -{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f}, -{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f}, -{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f}, -{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f}, -{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f}, -{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f}, -{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f}, -{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f}, -{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f}, -{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f}, -{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f}, -{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f}, -{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f}, -{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f}, -{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f}, -{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f}, -{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f}, -{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f}, -{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f}, -{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f}, -{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f}, -{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f}, -{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f}, -{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f}, -{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f}, -{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f}, -{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f}, -{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f}, -{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f}, -{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f}, -{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f}, -{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f}, -{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f}, -{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f}, -{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f}, -{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f}, -{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f}, -{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f}, -{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f}, -{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f}, -{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f}, -{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f}, -{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f}, -{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f}, -{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f}, -{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f}, -{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f}, -{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f}, -{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f}, -{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f}, -{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f}, -{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f}, -{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f}, -{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f}, -{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f}, -{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f}, -{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f}, -{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f}, -{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f}, -{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f}, -{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f}, -{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f}, -{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f}, -{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f}, -{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f}, -{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f}, -{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f}, -{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f}, -{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f}, -{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f}, -{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f}, -{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f}, -{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f}, -{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f}, -{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f}, -{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f}, -{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f}, -{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f}, -{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f}, -{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f}, -{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f}, -{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f}, -{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f}, -{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f}, -{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f}, -{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f}, -{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f}, -{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f}, -{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f}, -{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f}, -{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f}, -{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f}, -{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f}, -{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f}, -{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f}, -{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f}, -{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f}, -{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f}, -{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f}, -{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f}, -{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f}, -{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f}, -{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f}, -{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f}, -{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f}, -{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f}, -{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f}, -{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f}, -{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f}, -{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f}, -{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f}, -{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f}, -{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f}, -{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f}, -{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f}, -{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f}, -{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f}, -{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f}, -{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f}, -{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f}, -{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f}, -{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f}, -{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f}, -{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f}, -{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f}, -{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f}, -{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f}, -{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f}, -{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f}, -{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f}, -{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f}, -{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f}, -{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f}, -{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f}, -{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f}, -{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f}, -{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f}, -{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f}, -{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f}, -{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f}, -{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f}, -{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f}, -{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f}, -{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f}, -{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f}, -{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f}, -{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f}, -{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f}, -{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f}, -{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f}, -{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f}, -{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f}, -{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f}, -{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f}, -{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f}, -{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f}, -{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f}, -{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f}, -{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f}, -{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f}, -{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f}, -{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f}, -{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f}, -{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f}, -{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f}, -{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f}, -{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f}, -}; -#ifndef FFT_BITREV480 -#define FFT_BITREV480 -static const opus_int16 fft_bitrev480[480] = { -0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, -8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, -16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, -24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, -4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, -12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, -20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, -28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, -1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, -9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, -17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, -25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, -5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, -13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, -21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, -29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, -2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, -10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, -18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, -26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, -6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, -14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, -22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, -30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, -3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, -11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, -19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, -27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, -7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, -15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, -23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, -31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, -}; -#endif - -#ifndef FFT_BITREV240 -#define FFT_BITREV240 -static const opus_int16 fft_bitrev240[240] = { -0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, -4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, -8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, -12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, -1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, -5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, -9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, -13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, -2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, -6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, -10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, -14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, -3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, -7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, -11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, -15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, -}; -#endif - -#ifndef FFT_BITREV120 -#define FFT_BITREV120 -static const opus_int16 fft_bitrev120[120] = { -0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, -4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, -1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, -5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, -2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, -6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, -3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, -7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, -}; -#endif - -#ifndef FFT_BITREV60 -#define FFT_BITREV60 -static const opus_int16 fft_bitrev60[60] = { -0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, -1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, -2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, -3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, -}; -#endif - -#ifndef FFT_STATE48000_960_0 -#define FFT_STATE48000_960_0 -static const kiss_fft_state fft_state48000_960_0 = { -480, /* nfft */ -0.002083333f, /* scale */ --1, /* shift */ -{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev480, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_480, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_1 -#define FFT_STATE48000_960_1 -static const kiss_fft_state fft_state48000_960_1 = { -240, /* nfft */ -0.004166667f, /* scale */ -1, /* shift */ -{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev240, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_240, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_2 -#define FFT_STATE48000_960_2 -static const kiss_fft_state fft_state48000_960_2 = { -120, /* nfft */ -0.008333333f, /* scale */ -2, /* shift */ -{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev120, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_120, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_3 -#define FFT_STATE48000_960_3 -static const kiss_fft_state fft_state48000_960_3 = { -60, /* nfft */ -0.016666667f, /* scale */ -3, /* shift */ -{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev60, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_60, -#else -NULL, -#endif -}; -#endif - -#endif - -#ifndef MDCT_TWIDDLES960 -#define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[1800] = { -0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f, -0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f, -0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f, -0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f, -0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f, -0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f, -0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f, -0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f, -0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f, -0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f, -0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f, -0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f, -0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f, -0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f, -0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f, -0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f, -0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f, -0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f, -0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f, -0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f, -0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f, -0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f, -0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f, -0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f, -0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f, -0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f, -0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f, -0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f, -0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f, -0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f, -0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f, -0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f, -0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f, -0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f, -0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f, -0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f, -0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f, -0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f, -0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f, -0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f, -0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f, -0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f, -0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f, -0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f, -0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f, -0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f, -0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f, -0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f, -0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f, -0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f, -0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f, -0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f, -0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f, -0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f, -0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f, -0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f, -0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f, -0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f, -0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f, -0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f, -0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f, -0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f, -0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f, -0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f, -0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f, -0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f, -0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f, -0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f, -0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f, -0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f, -0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f, -0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f, -0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f, -0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f, -0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f, -0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f, -0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f, -0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f, -0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f, -0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f, -0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f, -0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f, -0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f, -0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f, -0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f, -0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f, -0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f, -0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f, -0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f, -0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f, -0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f, -0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f, -0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f, -0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f, -0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f, -0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f, --0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f, --0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f, --0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f, --0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f, --0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f, --0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f, --0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f, --0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f, --0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f, --0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f, --0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f, --0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f, --0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f, --0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f, --0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f, --0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f, --0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f, --0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f, --0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f, --0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f, --0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f, --0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f, --0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f, --0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f, --0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f, --0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f, --0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f, --0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f, --0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f, --0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f, --0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f, --0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f, --0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f, --0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f, --0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f, --0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f, --0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f, --0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f, --0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f, --0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f, --0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f, --0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f, --0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f, --0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f, --0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f, --0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f, --0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f, --0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f, --0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f, --0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f, --0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f, --0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f, --0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f, --0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f, --0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f, --0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f, --0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f, --0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f, --0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f, --0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f, --0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f, --0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f, --0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f, --0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f, --0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f, --0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f, --0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f, --0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f, --0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f, --0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f, --0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f, --0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f, --0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f, --0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f, --0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f, --0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f, --0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f, --0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f, --0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f, --0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f, --0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f, --0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f, --0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f, --0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f, --0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f, --0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f, --0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f, --0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f, --0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f, --0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f, --0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f, --0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f, --0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f, --0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f, --0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f, --0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f, -0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f, -0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f, -0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f, -0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f, -0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f, -0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f, -0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f, -0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f, -0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f, -0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f, -0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f, -0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f, -0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f, -0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f, -0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f, -0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f, -0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f, -0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f, -0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f, -0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f, -0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f, -0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f, -0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f, -0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f, -0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f, -0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f, -0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f, -0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f, -0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f, -0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f, -0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f, -0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f, -0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f, -0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f, -0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f, -0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f, -0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f, -0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f, -0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f, -0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f, -0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f, -0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f, -0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f, -0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f, -0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f, -0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f, -0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f, -0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f, --0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f, --0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f, --0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f, --0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f, --0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f, --0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f, --0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f, --0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f, --0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f, --0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f, --0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f, --0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f, --0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f, --0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f, --0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f, --0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f, --0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f, --0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f, --0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f, --0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f, --0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f, --0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f, --0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f, --0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f, --0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f, --0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f, --0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f, --0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f, --0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f, --0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f, --0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f, --0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f, --0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f, --0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f, --0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f, --0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f, --0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f, --0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f, --0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f, --0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f, --0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f, --0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f, --0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f, --0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f, --0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f, --0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f, --0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f, --0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f, -0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f, -0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f, -0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f, -0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f, -0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f, -0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f, -0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f, -0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f, -0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f, -0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f, -0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f, -0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f, -0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f, -0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f, -0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f, -0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f, -0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f, -0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f, -0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f, -0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f, -0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f, -0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f, -0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f, -0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f, --0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f, --0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f, --0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f, --0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f, --0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f, --0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f, --0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f, --0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f, --0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f, --0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f, --0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f, --0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f, --0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f, --0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f, --0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f, --0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f, --0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f, --0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f, --0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f, --0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f, --0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f, --0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f, --0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f, --0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f, -0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f, -0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f, -0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f, -0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f, -0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f, -0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f, -0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f, -0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f, -0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f, -0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f, -0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f, -0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f, --0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f, --0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f, --0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f, --0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f, --0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f, --0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f, --0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f, --0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f, --0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f, --0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f, --0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f, --0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f, -}; -#endif - -static const CELTMode mode48000_960_120 = { -48000, /* Fs */ -120, /* overlap */ -21, /* nbEBands */ -21, /* effEBands */ -{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, }, /* preemph */ -eband5ms, /* eBands */ -3, /* maxLM */ -8, /* nbShortMdcts */ -120, /* shortMdctSize */ -11, /* nbAllocVectors */ -band_allocation, /* allocVectors */ -logN400, /* logN */ -window120, /* window */ -{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */ -{392, cache_index50, cache_bits50, cache_caps50}, /* cache */ -}; - -/* List of all the available modes */ -#define TOTAL_MODES 1 -static const CELTMode * const static_mode_list[TOTAL_MODES] = { -&mode48000_960_120, -}; diff --git a/thirdparty/opus/celt/static_modes_float_arm_ne10.h b/thirdparty/opus/celt/static_modes_float_arm_ne10.h deleted file mode 100644 index 934a82a420..0000000000 --- a/thirdparty/opus/celt/static_modes_float_arm_ne10.h +++ /dev/null @@ -1,404 +0,0 @@ -/* The contents of this file was automatically generated by - * dump_mode_arm_ne10.c with arguments: 48000 960 - * It contains static definitions for some pre-defined modes. */ -#include - -#ifndef NE10_FFT_PARAMS48000_960 -#define NE10_FFT_PARAMS48000_960 -static const ne10_int32_t ne10_factors_480[64] = { -4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_240[64] = { -3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_120[64] = { -3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_60[64] = { -2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, -{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, -{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, -{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, -{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, -{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, -{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, -{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, -{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, -{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, -{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, -{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, -{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, -{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, -{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, -{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, -{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, -{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, -{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, -{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, -{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, -{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, -{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, -{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, -{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f}, -{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f}, -{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f}, -{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f}, -{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f}, -{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f}, -{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f}, -{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f}, -{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f}, -{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f}, -{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f}, -{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f}, -{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f}, -{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f}, -{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f}, -{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f}, -{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f}, -{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f}, -{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f}, -{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f}, -{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f}, -{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f}, -{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f}, -{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f}, -{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f}, -{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f}, -{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f}, -{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f}, -{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f}, -{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f}, -{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f}, -{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f}, -{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f}, -{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f}, -{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f}, -{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f}, -{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f}, -{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f}, -{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f}, -{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f}, -{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, -{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, -{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, -{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, -{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, -{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, -{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, -{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, -{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, -{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, -{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, -{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, -{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, -{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, -{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, -{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, -{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, -{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, -{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, -{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, -{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f}, -{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f}, -{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f}, -{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f}, -{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f}, -{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f}, -{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f}, -{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f}, -{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f}, -{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f}, -{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f}, -{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f}, -{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f}, -{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f}, -{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f}, -{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f}, -{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f}, -{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f}, -{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f}, -{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f}, -{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f}, -{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f}, -{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f}, -{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f}, -{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f}, -{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f}, -{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f}, -{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f}, -{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f}, -{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f}, -{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f}, -{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f}, -{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f}, -{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f}, -{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f}, -{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f}, -{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f}, -{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f}, -{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f}, -{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f}, -{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f}, -{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f}, -{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f}, -{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f}, -{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f}, -{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f}, -{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f}, -{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f}, -{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f}, -{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f}, -{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f}, -{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f}, -{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f}, -{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f}, -{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f}, -{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f}, -{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f}, -{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f}, -{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f}, -{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f}, -}; -static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, -{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, -{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, -{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, -{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, -{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, -{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, -{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, -{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, -{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, -{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, -{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, -{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, -{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, -{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, -{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, -{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, -{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, -{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, -{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, -{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, -{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, -{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, -{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, -{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, -{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, -{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, -{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, -{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, -{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, -{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, -{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, -{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, -{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, -{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, -{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f}, -{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f}, -{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f}, -{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f}, -{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f}, -{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f}, -{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f}, -{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f}, -{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f}, -{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f}, -{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f}, -{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f}, -{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f}, -{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f}, -{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f}, -{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f}, -{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f}, -{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f}, -{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f}, -{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f}, -{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f}, -{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f}, -{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f}, -{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f}, -{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f}, -{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f}, -{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f}, -{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f}, -{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f}, -{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f}, -}; -static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, -{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, -{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, -{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, -{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, -{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, -{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, -{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, -{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, -{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, -{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, -{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, -{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, -{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, -{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, -{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, -{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, -{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, -{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, -{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, -{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, -{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, -{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, -{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, -}; -static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, -{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, -{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, -{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, -{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, -}; -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = { -120, -(ne10_int32_t *)ne10_factors_480, -(ne10_fft_cpx_float32_t *)ne10_twiddles_480, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_480 = { -1, -(void *)&ne10_fft_state_float32_t_480, -}; - -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = { -60, -(ne10_int32_t *)ne10_factors_240, -(ne10_fft_cpx_float32_t *)ne10_twiddles_240, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_240 = { -1, -(void *)&ne10_fft_state_float32_t_240, -}; - -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = { -30, -(ne10_int32_t *)ne10_factors_120, -(ne10_fft_cpx_float32_t *)ne10_twiddles_120, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_120 = { -1, -(void *)&ne10_fft_state_float32_t_120, -}; - -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = { -15, -(ne10_int32_t *)ne10_factors_60, -(ne10_fft_cpx_float32_t *)ne10_twiddles_60, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_60 = { -1, -(void *)&ne10_fft_state_float32_t_60, -}; - -#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/thirdparty/opus/celt/tests/test_unit_cwrs32.c b/thirdparty/opus/celt/tests/test_unit_cwrs32.c deleted file mode 100644 index 36dd8af5f5..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_cwrs32.c +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, - Gregory Maxwell - Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#else -#define TEST_CUSTOM_MODES -#endif - -#define CELT_C -#include "stack_alloc.h" -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" -#include "cwrs.c" -#include "mathops.c" -#include "rate.h" - -#define NMAX (240) -#define KMAX (128) - -#ifdef TEST_CUSTOM_MODES - -#define NDIMS (44) -static const int pn[NDIMS]={ - 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 18, 20, 22, - 24, 26, 28, 30, 32, 36, 40, 44, 48, - 52, 56, 60, 64, 72, 80, 88, 96, 104, - 112, 120, 128, 144, 160, 176, 192, 208 -}; -static const int pkmax[NDIMS]={ - 128, 128, 128, 128, 88, 52, 36, 26, 22, - 18, 16, 15, 13, 12, 12, 11, 10, 9, - 9, 8, 8, 7, 7, 7, 7, 6, 6, - 6, 6, 6, 5, 5, 5, 5, 5, 5, - 4, 4, 4, 4, 4, 4, 4, 4 -}; - -#else /* TEST_CUSTOM_MODES */ - -#define NDIMS (22) -static const int pn[NDIMS]={ - 2, 3, 4, 6, 8, 9, 11, 12, 16, - 18, 22, 24, 32, 36, 44, 48, 64, 72, - 88, 96, 144, 176 -}; -static const int pkmax[NDIMS]={ - 128, 128, 128, 88, 36, 26, 18, 16, 12, - 11, 9, 9, 7, 7, 6, 6, 5, 5, - 5, 5, 4, 4 -}; - -#endif - -int main(void){ - int t; - int n; - ALLOC_STACK; - for(t=0;tpkmax[t])break; - printf("Testing CWRS with N=%i, K=%i...\n",n,k); -#if defined(SMALL_FOOTPRINT) - nc=ncwrs_urow(n,k,uu); -#else - nc=CELT_PVQ_V(n,k); -#endif - inc=nc/20000; - if(inc<1)inc=1; - for(i=0;i");*/ -#if defined(SMALL_FOOTPRINT) - ii=icwrs(n,k,&v,y,u); -#else - ii=icwrs(n,y); - v=CELT_PVQ_V(n,k); -#endif - if(ii!=i){ - fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n", - (long)ii,(long)i); - return 1; - } - if(v!=nc){ - fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n", - (long)v,(long)nc); - return 2; - } - /*printf(" %6u\n",i);*/ - } - /*printf("\n");*/ - } - } - return 0; -} diff --git a/thirdparty/opus/celt/tests/test_unit_dft.c b/thirdparty/opus/celt/tests/test_unit_dft.c deleted file mode 100644 index 6166eb0e4f..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_dft.c +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (c) 2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define SKIP_CONFIG_H - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#include - -#define CELT_C -#define TEST_UNIT_DFT_C -#include "stack_alloc.h" -#include "kiss_fft.h" -#include "kiss_fft.c" -#include "mathops.c" -#include "entcode.c" - -#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/x86cpu.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# include "celt_lpc.c" -# include "pitch.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "mdct.c" -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -int ret = 0; - -void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse) -{ - int bin,k; - double errpow=0,sigpow=0, snr; - - for (bin=0;bin1) { - int k; - for (k=1;k -#include -#include -#include -#include "entcode.h" -#include "entenc.h" -#include "entdec.h" -#include - -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" - -#ifndef M_LOG2E -# define M_LOG2E 1.4426950408889634074 -#endif -#define DATA_SIZE 10000000 -#define DATA_SIZE2 10000 - -int main(int _argc,char **_argv){ - ec_enc enc; - ec_dec dec; - long nbits; - long nbits2; - double entropy; - int ft; - int ftb; - int sz; - int i; - int ret; - unsigned int sym; - unsigned int seed; - unsigned char *ptr; - const char *env_seed; - ret=0; - entropy=0; - if (_argc > 2) { - fprintf(stderr, "Usage: %s []\n", _argv[0]); - return 1; - } - env_seed = getenv("SEED"); - if (_argc > 1) - seed = atoi(_argv[1]); - else if (env_seed) - seed = atoi(env_seed); - else - seed = time(NULL); - /*Testing encoding of raw bit values.*/ - ptr = (unsigned char *)malloc(DATA_SIZE); - ec_enc_init(&enc,ptr, DATA_SIZE); - for(ft=2;ft<1024;ft++){ - for(i=0;i>(rand()%11U))+1U)+10; - sz=rand()/((RAND_MAX>>(rand()%9U))+1U); - data=(unsigned *)malloc(sz*sizeof(*data)); - tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); - ec_enc_init(&enc,ptr,DATA_SIZE2); - zeros = rand()%13==0; - tell[0]=ec_tell_frac(&enc); - for(j=0;j>(rand()%9U))+1U); - logp1=(unsigned *)malloc(sz*sizeof(*logp1)); - data=(unsigned *)malloc(sz*sizeof(*data)); - tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); - enc_method=(unsigned *)malloc(sz*sizeof(*enc_method)); - ec_enc_init(&enc,ptr,DATA_SIZE2); - tell[0]=ec_tell_frac(&enc); - for(j=0;j>1)+1); - logp1[j]=(rand()%15)+1; - enc_method[j]=rand()/((RAND_MAX>>2)+1); - switch(enc_method[j]){ - case 0:{ - ec_encode(&enc,data[j]?(1<>2)+1); - switch(dec_method){ - case 0:{ - fs=ec_decode(&dec,1<=(1<=(1< -#include -#include "laplace.h" -#define CELT_C -#include "stack_alloc.h" - -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" -#include "laplace.c" - -#define DATA_SIZE 40000 - -int ec_laplace_get_start_freq(int decay) -{ - opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1); - int fs = (ft*(16384-decay))/(16384+decay); - return fs+LAPLACE_MINP; -} - -int main(void) -{ - int i; - int ret = 0; - ec_enc enc; - ec_dec dec; - unsigned char *ptr; - int val[10000], decay[10000]; - ALLOC_STACK; - ptr = (unsigned char *)malloc(DATA_SIZE); - ec_enc_init(&enc,ptr,DATA_SIZE); - - val[0] = 3; decay[0] = 6000; - val[1] = 0; decay[1] = 5800; - val[2] = -1; decay[2] = 5600; - for (i=3;i<10000;i++) - { - val[i] = rand()%15-7; - decay[i] = rand()%11000+5000; - } - for (i=0;i<10000;i++) - ec_laplace_encode(&enc, &val[i], - ec_laplace_get_start_freq(decay[i]), decay[i]); - - ec_enc_done(&enc); - - ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc)); - - for (i=0;i<10000;i++) - { - int d = ec_laplace_decode(&dec, - ec_laplace_get_start_freq(decay[i]), decay[i]); - if (d != val[i]) - { - fprintf (stderr, "Got %d instead of %d\n", d, val[i]); - ret = 1; - } - } - - free(ptr); - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_mathops.c b/thirdparty/opus/celt/tests/test_unit_mathops.c deleted file mode 100644 index fd3319da91..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_mathops.c +++ /dev/null @@ -1,304 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, - Gregory Maxwell - Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#define CELT_C - -#include -#include -#include "mathops.c" -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" -#include "bands.c" -#include "quant_bands.c" -#include "laplace.c" -#include "vq.c" -#include "cwrs.c" -#include "pitch.c" -#include "celt_lpc.c" -#include "celt.c" - -#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# if defined(OPUS_X86_MAY_HAVE_SSE) -# include "x86/pitch_sse.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE2) -# include "x86/pitch_sse2.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/pitch_sse4_1.c" -# include "x86/celt_lpc_sse.c" -# endif -# include "x86/x86_celt_map.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "kiss_fft.c" -# include "mdct.c" -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#ifdef FIXED_POINT -#define WORD "%d" -#else -#define WORD "%f" -#endif - -int ret = 0; - -void testdiv(void) -{ - opus_int32 i; - for (i=1;i<=327670;i++) - { - double prod; - opus_val32 val; - val = celt_rcp(i); -#ifdef FIXED_POINT - prod = (1./32768./65526.)*val*i; -#else - prod = val*i; -#endif - if (fabs(prod-1) > .00025) - { - fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod); - ret = 1; - } - } -} - -void testsqrt(void) -{ - opus_int32 i; - for (i=1;i<=1000000000;i++) - { - double ratio; - opus_val16 val; - val = celt_sqrt(i); - ratio = val/sqrt(i); - if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2) - { - fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio); - ret = 1; - } - i+= i>>10; - } -} - -void testbitexactcos(void) -{ - int i; - opus_int32 min_d,max_d,last,chk; - chk=max_d=0; - last=min_d=32767; - for(i=64;i<=16320;i++) - { - opus_int32 d; - opus_int32 q=bitexact_cos(i); - chk ^= q*i; - d = last - q; - if (d>max_d)max_d=d; - if (dmax_d)max_d=d; - if (d0.0009) - { - fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} - -void testexp2(void) -{ - float x; - for (x=-11.0;x<24.0;x+=0.0007) - { - float error = fabs(x-(1.442695040888963387*log(celt_exp2(x)))); - if (error>0.0002) - { - fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} - -void testexp2log2(void) -{ - float x; - for (x=-11.0;x<24.0;x+=0.0007) - { - float error = fabs(x-(celt_log2(celt_exp2(x)))); - if (error>0.001) - { - fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} -#else -void testlog2(void) -{ - opus_val32 x; - for (x=8;x<1073741824;x+=(x>>3)) - { - float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0); - if (error>0.003) - { - fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error); - ret = 1; - } - } -} - -void testexp2(void) -{ - opus_val16 x; - for (x=-32768;x<15360;x++) - { - float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0))); - float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0); - if (error1>0.0002&&error2>0.00004) - { - fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2); - ret = 1; - } - } -} - -void testexp2log2(void) -{ - opus_val32 x; - for (x=8;x<65536;x+=(x>>3)) - { - float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384; - if (error>0.004) - { - fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error); - ret = 1; - } - } -} - -void testilog2(void) -{ - opus_val32 x; - for (x=1;x<=268435455;x+=127) - { - opus_val32 lg; - opus_val32 y; - - lg = celt_ilog2(x); - if (lg<0 || lg>=31) - { - printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg); - ret = 1; - } - y = 1<>1)>=y) - { - printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y); - ret = 1; - } - } -} -#endif - -int main(void) -{ - testbitexactcos(); - testbitexactlog2tan(); - testdiv(); - testsqrt(); - testlog2(); - testexp2(); - testexp2log2(); -#ifdef FIXED_POINT - testilog2(); -#endif - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_mdct.c b/thirdparty/opus/celt/tests/test_unit_mdct.c deleted file mode 100644 index 8dbb9caa2e..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_mdct.c +++ /dev/null @@ -1,230 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define SKIP_CONFIG_H - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#include - -#define CELT_C -#include "mdct.h" -#include "stack_alloc.h" - -#include "kiss_fft.c" -#include "mdct.c" -#include "mathops.c" -#include "entcode.c" - -#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/x86cpu.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# include "pitch.c" -# include "celt_lpc.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -int ret = 0; -void check(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) -{ - int bin,k; - double errpow=0,sigpow=0; - double snr; - for (bin=0;bin1) { - int k; - for (k=1;k -#include -#include "vq.c" -#include "cwrs.c" -#include "entcode.c" -#include "entenc.c" -#include "entdec.c" -#include "mathops.c" -#include "bands.h" -#include "pitch.c" -#include "celt_lpc.c" -#include "celt.c" -#include - -#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# if defined(OPUS_X86_MAY_HAVE_SSE) -# include "x86/pitch_sse.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE2) -# include "x86/pitch_sse2.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/pitch_sse4_1.c" -# include "x86/celt_lpc_sse.c" -# endif -# include "x86/x86_celt_map.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "kiss_fft.c" -# include "mdct.c" -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#define MAX_SIZE 100 - -int ret=0; -void test_rotation(int N, int K) -{ - int i; - double err = 0, ener = 0, snr, snr0; - opus_val16 x0[MAX_SIZE]; - opus_val16 x1[MAX_SIZE]; - for (i=0;i 20) - { - fprintf(stderr, "FAIL!\n"); - ret = 1; - } -} - -int main(void) -{ - ALLOC_STACK; - test_rotation(15, 3); - test_rotation(23, 5); - test_rotation(50, 3); - test_rotation(80, 1); - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_types.c b/thirdparty/opus/celt/tests/test_unit_types.c deleted file mode 100644 index 67a0fb8ed3..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_types.c +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_types.h" -#include - -int main(void) -{ - opus_int16 i = 1; - i <<= 14; - if (i>>14 != 1) - { - fprintf(stderr, "opus_int16 isn't 16 bits\n"); - return 1; - } - if (sizeof(opus_int16)*2 != sizeof(opus_int32)) - { - fprintf(stderr, "16*2 != 32\n"); - return 1; - } - return 0; -} diff --git a/thirdparty/opus/celt/vq.c b/thirdparty/opus/celt/vq.c deleted file mode 100644 index d29f38fd8e..0000000000 --- a/thirdparty/opus/celt/vq.c +++ /dev/null @@ -1,408 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mathops.h" -#include "cwrs.h" -#include "vq.h" -#include "arch.h" -#include "os_support.h" -#include "bands.h" -#include "rate.h" -#include "pitch.h" - -#ifndef OVERRIDE_vq_exp_rotation1 -static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) -{ - int i; - opus_val16 ms; - celt_norm *Xptr; - Xptr = X; - ms = NEG16(s); - for (i=0;i=0;i--) - { - celt_norm x1, x2; - x1 = Xptr[0]; - x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); - *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); - } -} -#endif /* OVERRIDE_vq_exp_rotation1 */ - -static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) -{ - static const int SPREAD_FACTOR[3]={15,10,5}; - int i; - opus_val16 c, s; - opus_val16 gain, theta; - int stride2=0; - int factor; - - if (2*K>=len || spread==SPREAD_NONE) - return; - factor = SPREAD_FACTOR[spread-1]; - - gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); - theta = HALF16(MULT16_16_Q15(gain,gain)); - - c = celt_cos_norm(EXTEND32(theta)); - s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ - - if (len>=8*stride) - { - stride2 = 1; - /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. - It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ - while ((stride2*stride2+stride2)*stride + (stride>>2) < len) - stride2++; - } - /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for - extract_collapse_mask().*/ - len = celt_udiv(len, stride); - for (i=0;i>1; -#endif - t = VSHR32(Ryy, 2*(k-7)); - g = MULT16_16_P15(celt_rsqrt_norm(t),gain); - - i=0; - do - X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); - while (++i < N); -} - -static unsigned extract_collapse_mask(int *iy, int N, int B) -{ - unsigned collapse_mask; - int N0; - int i; - if (B<=1) - return 1; - /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for - exp_rotation().*/ - N0 = celt_udiv(N, B); - collapse_mask = 0; - i=0; do { - int j; - unsigned tmp=0; - j=0; do { - tmp |= iy[i*N0+j]; - } while (++j0, "alg_quant() needs at least one pulse"); - celt_assert2(N>1, "alg_quant() needs at least two dimensions"); - - ALLOC(y, N, celt_norm); - ALLOC(iy, N, int); - ALLOC(signx, N, opus_val16); - - exp_rotation(X, N, 1, B, K, spread); - - /* Get rid of the sign */ - sum = 0; - j=0; do { - if (X[j]>0) - signx[j]=1; - else { - signx[j]=-1; - X[j]=-X[j]; - } - iy[j] = 0; - y[j] = 0; - } while (++j (N>>1)) - { - opus_val16 rcp; - j=0; do { - sum += X[j]; - } while (++j EPSILON && sum < 64)) -#endif - { - X[0] = QCONST16(1.f,14); - j=1; do - X[j]=0; - while (++j=1, "Allocated too many pulses in the quick pass"); - - /* This should never happen, but just in case it does (e.g. on silence) - we fill the first bin with pulses. */ -#ifdef FIXED_POINT_DEBUG - celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); -#endif - if (pulsesLeft > N+3) - { - opus_val16 tmp = (opus_val16)pulsesLeft; - yy = MAC16_16(yy, tmp, tmp); - yy = MAC16_16(yy, tmp, y[0]); - iy[0] += pulsesLeft; - pulsesLeft=0; - } - - s = 1; - for (i=0;i= best_num/best_den, but that way - we can do it without any division */ - /* OPT: Make sure to use conditional moves here */ - if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) - { - best_den = Ryy; - best_num = Rxy; - best_id = j; - } - } while (++j0, "alg_unquant() needs at least one pulse"); - celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); - ALLOC(iy, N, int); - Ryy = decode_pulses(iy, N, K, dec); - normalise_residual(iy, X, N, Ryy, gain); - exp_rotation(X, N, -1, B, K, spread); - collapse_mask = extract_collapse_mask(iy, N, B); - RESTORE_STACK; - return collapse_mask; -} - -#ifndef OVERRIDE_renormalise_vector -void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) -{ - int i; -#ifdef FIXED_POINT - int k; -#endif - opus_val32 E; - opus_val16 g; - opus_val32 t; - celt_norm *xptr; - E = EPSILON + celt_inner_prod(X, X, N, arch); -#ifdef FIXED_POINT - k = celt_ilog2(E)>>1; -#endif - t = VSHR32(E, 2*(k-7)); - g = MULT16_16_P15(celt_rsqrt_norm(t),gain); - - xptr = X; - for (i=0;i -#include -#include -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" -#include "x86cpu.h" - -#if defined(FIXED_POINT) - -void celt_fir_sse4_1(const opus_val16 *_x, - const opus_val16 *num, - opus_val16 *_y, - int N, - int ord, - opus_val16 *mem, - int arch) -{ - int i,j; - VARDECL(opus_val16, rnum); - VARDECL(opus_val16, x); - - __m128i vecNoA; - opus_int32 noA ; - SAVE_STACK; - - ALLOC(rnum, ord, opus_val16); - ALLOC(x, N+ord, opus_val16); - for(i=0;i> 1; - vecNoA = _mm_set_epi32(noA, noA, noA, noA); - - for (i=0;i -#include "arch.h" - -void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) -{ - int j; - __m128 xsum1, xsum2; - xsum1 = _mm_loadu_ps(sum); - xsum2 = _mm_setzero_ps(); - - for (j = 0; j < len-3; j += 4) - { - __m128 x0 = _mm_loadu_ps(x+j); - __m128 yj = _mm_loadu_ps(y+j); - __m128 y3 = _mm_loadu_ps(y+j+3); - - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), - _mm_shuffle_ps(yj,y3,0x49))); - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), - _mm_shuffle_ps(yj,y3,0x9e))); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); - } - if (j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - } - } - } - _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); -} - - -void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - __m128 xsum1, xsum2; - xsum1 = _mm_setzero_ps(); - xsum2 = _mm_setzero_ps(); - for (i=0;i -#include - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y, - int N) -{ - opus_int i, dataSize16; - opus_int32 sum; - - __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; - __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; - - sum = 0; - dataSize16 = N & ~15; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for (i=0;i= 8) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - i += 8; - } - - acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1)); - acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E)); - sum += _mm_cvtsi128_si32(acc1); - - for (;i -#include - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -#include -#include "x86cpu.h" - -opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y, - int N) -{ - opus_int i, dataSize16; - opus_int32 sum; - __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; - __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; - __m128i inVec1_3210, inVec2_3210; - - sum = 0; - dataSize16 = N & ~15; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for (i=0;i= 8) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - i += 8; - } - - if (N - i >= 4) - { - inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]); - inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]); - - inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210); - - acc1 = _mm_add_epi32(acc1, inVec1_3210); - i += 4; - } - - acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1)); - acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E)); - - sum += _mm_cvtsi128_si32(acc1); - - for (;i= 3); - - sum0 = _mm_setzero_si128(); - sum1 = _mm_setzero_si128(); - sum2 = _mm_setzero_si128(); - sum3 = _mm_setzero_si128(); - - for (j=0;j<(len-7);j+=8) - { - vecX = _mm_loadu_si128((__m128i *)(&x[j + 0])); - vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0])); - vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1])); - vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2])); - vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3])); - - sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0)); - sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1)); - sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2)); - sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3)); - } - - sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0)); - sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E)); - - sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1)); - sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E)); - - sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2)); - sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E)); - - sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3)); - sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E)); - - vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1), - _mm_unpacklo_epi32(sum2, sum3)); - - for (;j<(len-3);j+=4) - { - vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); - vecX0 = _mm_shuffle_epi32(vecX, 0x00); - vecX1 = _mm_shuffle_epi32(vecX, 0x55); - vecX2 = _mm_shuffle_epi32(vecX, 0xaa); - vecX3 = _mm_shuffle_epi32(vecX, 0xff); - - vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); - vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]); - vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]); - vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]); - - sum0 = _mm_mullo_epi32(vecX0, vecY0); - sum1 = _mm_mullo_epi32(vecX1, vecY1); - sum2 = _mm_mullo_epi32(vecX2, vecY2); - sum3 = _mm_mullo_epi32(vecX3, vecY3); - - sum0 = _mm_add_epi32(sum0, sum1); - sum2 = _mm_add_epi32(sum2, sum3); - vecSum = _mm_add_epi32(vecSum, sum0); - vecSum = _mm_add_epi32(vecSum, sum2); - } - - for (;j -static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) -{ - __cpuid((int*)CPUInfo, InfoType); -} - -#else - -#if defined(CPU_INFO_BY_C) -#include -#endif - -static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) -{ -#if defined(CPU_INFO_BY_ASM) -#if defined(__i386__) && defined(__PIC__) -/* %ebx is PIC register in 32-bit, so mustn't clobber it. */ - __asm__ __volatile__ ( - "xchg %%ebx, %1\n" - "cpuid\n" - "xchg %%ebx, %1\n": - "=a" (CPUInfo[0]), - "=r" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "0" (InfoType) - ); -#else - __asm__ __volatile__ ( - "cpuid": - "=a" (CPUInfo[0]), - "=b" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "0" (InfoType) - ); -#endif -#elif defined(CPU_INFO_BY_C) - __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3])); -#endif -} - -#endif - -typedef struct CPU_Feature{ - /* SIMD: 128-bit */ - int HW_SSE; - int HW_SSE2; - int HW_SSE41; - /* SIMD: 256-bit */ - int HW_AVX; -} CPU_Feature; - -static void opus_cpu_feature_check(CPU_Feature *cpu_feature) -{ - unsigned int info[4] = {0}; - unsigned int nIds = 0; - - cpuid(info, 0); - nIds = info[0]; - - if (nIds >= 1){ - cpuid(info, 1); - cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; - cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; - cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; - cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; - } - else { - cpu_feature->HW_SSE = 0; - cpu_feature->HW_SSE2 = 0; - cpu_feature->HW_SSE41 = 0; - cpu_feature->HW_AVX = 0; - } -} - -int opus_select_arch(void) -{ - CPU_Feature cpu_feature; - int arch; - - opus_cpu_feature_check(&cpu_feature); - - arch = 0; - if (!cpu_feature.HW_SSE) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_SSE2) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_SSE41) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_AVX) - { - return arch; - } - arch++; - - return arch; -} - -#endif diff --git a/thirdparty/opus/celt/x86/x86cpu.h b/thirdparty/opus/celt/x86/x86cpu.h deleted file mode 100644 index 04fd48aac4..0000000000 --- a/thirdparty/opus/celt/x86/x86cpu.h +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(X86CPU_H) -# define X86CPU_H - -# if defined(OPUS_X86_MAY_HAVE_SSE) -# define MAY_HAVE_SSE(name) name ## _sse -# else -# define MAY_HAVE_SSE(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_SSE2) -# define MAY_HAVE_SSE2(name) name ## _sse2 -# else -# define MAY_HAVE_SSE2(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# define MAY_HAVE_SSE4_1(name) name ## _sse4_1 -# else -# define MAY_HAVE_SSE4_1(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_AVX) -# define MAY_HAVE_AVX(name) name ## _avx -# else -# define MAY_HAVE_AVX(name) name ## _c -# endif - -# if defined(OPUS_HAVE_RTCD) -int opus_select_arch(void); -# endif - -/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() - or _mm_cvtepi16_epi32() when optimizations are disabled, even though the - actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory - reference, these require 16-byte alignment and load a full 16 bytes (instead - of 4 or 8), possibly reading out of bounds. - - We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or - _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 - reference in the PMOVSXWD instruction itself, but gcc is not smart enough to - optimize this out when optimizations ARE enabled. - - Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 - (which is fair, since technically the compiler is always allowed to do the - dereference before invoking the function implementing the intrinsic). - However, it is smart enough to eliminate the extra MOVD instruction. - For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out - the extra MOVQ if it's specified explicitly */ - -# if defined(__clang__) || !defined(__OPTIMIZE__) -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) -# else -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(*(__m128i *)(x))) -#endif - -# if !defined(__OPTIMIZE__) -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) -# else -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(*(__m128i *)(x))) -# endif - -#endif diff --git a/thirdparty/opus/config.h b/thirdparty/opus/config.h deleted file mode 100644 index 7b9c92c6a8..0000000000 --- a/thirdparty/opus/config.h +++ /dev/null @@ -1,133 +0,0 @@ -/* Opus configuration header */ -/* Based on the output of libopus configure script */ - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -#if (!defined( _MSC_VER ) || ( _MSC_VER >= 1800 )) - -/* Define to 1 if you have the `lrint' function. */ -#define HAVE_LRINT 1 - -/* Define to 1 if you have the `lrintf' function. */ -#define HAVE_LRINTF 1 - -#endif - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -#ifdef OPUS_ARM_OPT -/* Make use of ARM asm optimization */ -#define OPUS_ARM_ASM 1 - -/* Use generic ARMv4 inline asm optimizations */ -#define OPUS_ARM_INLINE_ASM 1 - -/* Use ARMv5E inline asm optimizations */ -#define OPUS_ARM_INLINE_EDSP 1 - -/* Use ARMv6 inline asm optimizations */ -#define OPUS_ARM_INLINE_MEDIA 1 - -/* Use ARM NEON inline asm optimizations */ -#define OPUS_ARM_INLINE_NEON 1 - -/* Define if assembler supports EDSP instructions */ -#define OPUS_ARM_MAY_HAVE_EDSP 1 - -/* Define if assembler supports ARMv6 media instructions */ -#define OPUS_ARM_MAY_HAVE_MEDIA 1 - -/* Define if compiler supports NEON instructions */ -#define OPUS_ARM_MAY_HAVE_NEON 1 -#endif // OPUS_ARM_OPT - -#ifdef OPUS_ARM64_OPT -/* Make use of ARM asm optimization */ -#define OPUS_ARM_ASM 1 - -/* Use ARMv6 inline asm optimizations */ -#define OPUS_ARM_INLINE_MEDIA 1 // work - -/* Use ARM NEON inline asm optimizations */ -#define OPUS_ARM_INLINE_NEON 1 // work - -/* Define if assembler supports EDSP instructions */ -#define OPUS_ARM_MAY_HAVE_EDSP 1 // work - -/* Define if assembler supports ARMv6 media instructions */ -#define OPUS_ARM_MAY_HAVE_MEDIA 1 // work - -/* Define if compiler supports NEON instructions */ -#define OPUS_ARM_MAY_HAVE_NEON 1 - -#endif // OPUS_ARM64_OPT - -/* This is a build of OPUS */ -#define OPUS_BUILD /**/ - -#ifndef WIN32 - /* Use C99 variable-size arrays */ - #define VAR_ARRAYS 1 -#else - /* Fixes VS 2013 compile error */ - #define USE_ALLOCA 1 -#endif - -#ifndef OPUS_FIXED_POINT -#define FLOAT_APPROX 1 -#endif - - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to the equivalent of the C99 'restrict' keyword, or to - nothing if this is not supported. Do not define if restrict is - supported directly. */ -#if (!defined( _MSC_VER ) || ( _MSC_VER >= 1800 )) -#define restrict __restrict -#else -#undef restrict -#endif -/* Work around a bug in Sun C++: it does not support _Restrict or - __restrict__, even though the corresponding Sun C compiler ends up with - "#define restrict _Restrict" or "#define restrict __restrict__" in the - previous line. Perhaps some future version of Sun C++ will work with - restrict; if so, hopefully it defines __RESTRICT like Sun C does. */ -#if defined __SUNPRO_CC && !defined __RESTRICT -# define _Restrict -# define __restrict__ -#endif diff --git a/thirdparty/opus/info.c b/thirdparty/opus/info.c deleted file mode 100644 index c36f9a9ee1..0000000000 --- a/thirdparty/opus/info.c +++ /dev/null @@ -1,758 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" -#include -#include - -static unsigned op_parse_uint16le(const unsigned char *_data){ - return _data[0]|_data[1]<<8; -} - -static int op_parse_int16le(const unsigned char *_data){ - int ret; - ret=_data[0]|_data[1]<<8; - return (ret^0x8000)-0x8000; -} - -static opus_uint32 op_parse_uint32le(const unsigned char *_data){ - return _data[0]|(opus_uint32)_data[1]<<8| - (opus_uint32)_data[2]<<16|(opus_uint32)_data[3]<<24; -} - -static opus_uint32 op_parse_uint32be(const unsigned char *_data){ - return _data[3]|(opus_uint32)_data[2]<<8| - (opus_uint32)_data[1]<<16|(opus_uint32)_data[0]<<24; -} - -int opus_head_parse(OpusHead *_head,const unsigned char *_data,size_t _len){ - OpusHead head; - if(_len<8)return OP_ENOTFORMAT; - if(memcmp(_data,"OpusHead",8)!=0)return OP_ENOTFORMAT; - if(_len<9)return OP_EBADHEADER; - head.version=_data[8]; - if(head.version>15)return OP_EVERSION; - if(_len<19)return OP_EBADHEADER; - head.channel_count=_data[9]; - head.pre_skip=op_parse_uint16le(_data+10); - head.input_sample_rate=op_parse_uint32le(_data+12); - head.output_gain=op_parse_int16le(_data+16); - head.mapping_family=_data[18]; - if(head.mapping_family==0){ - if(head.channel_count<1||head.channel_count>2)return OP_EBADHEADER; - if(head.version<=1&&_len>19)return OP_EBADHEADER; - head.stream_count=1; - head.coupled_count=head.channel_count-1; - if(_head!=NULL){ - _head->mapping[0]=0; - _head->mapping[1]=1; - } - } - else if(head.mapping_family==1){ - size_t size; - int ci; - if(head.channel_count<1||head.channel_count>8)return OP_EBADHEADER; - size=21+head.channel_count; - if(_lensize)return OP_EBADHEADER; - head.stream_count=_data[19]; - if(head.stream_count<1)return OP_EBADHEADER; - head.coupled_count=_data[20]; - if(head.coupled_count>head.stream_count)return OP_EBADHEADER; - for(ci=0;ci=head.stream_count+head.coupled_count - &&_data[21+ci]!=255){ - return OP_EBADHEADER; - } - } - if(_head!=NULL)memcpy(_head->mapping,_data+21,head.channel_count); - } - /*General purpose players should not attempt to play back content with - channel mapping family 255.*/ - else if(head.mapping_family==255)return OP_EIMPL; - /*No other channel mapping families are currently defined.*/ - else return OP_EBADHEADER; - if(_head!=NULL)memcpy(_head,&head,head.mapping-(unsigned char *)&head); - return 0; -} - -void opus_tags_init(OpusTags *_tags){ - memset(_tags,0,sizeof(*_tags)); -} - -void opus_tags_clear(OpusTags *_tags){ - int ncomments; - int ci; - ncomments=_tags->comments; - if(_tags->user_comments!=NULL)ncomments++; - for(ci=ncomments;ci-->0;)_ogg_free(_tags->user_comments[ci]); - _ogg_free(_tags->user_comments); - _ogg_free(_tags->comment_lengths); - _ogg_free(_tags->vendor); -} - -/*Ensure there's room for up to _ncomments comments.*/ -static int op_tags_ensure_capacity(OpusTags *_tags,size_t _ncomments){ - char **user_comments; - int *comment_lengths; - int cur_ncomments; - char *binary_suffix_data; - int binary_suffix_len; - size_t size; - if(OP_UNLIKELY(_ncomments>=(size_t)INT_MAX))return OP_EFAULT; - size=sizeof(*_tags->comment_lengths)*(_ncomments+1); - if(size/sizeof(*_tags->comment_lengths)!=_ncomments+1)return OP_EFAULT; - cur_ncomments=_tags->comments; - comment_lengths=_tags->comment_lengths; - binary_suffix_len=comment_lengths==NULL?0:comment_lengths[cur_ncomments]; - comment_lengths=(int *)_ogg_realloc(_tags->comment_lengths,size); - if(OP_UNLIKELY(comment_lengths==NULL))return OP_EFAULT; - comment_lengths[_ncomments]=binary_suffix_len; - _tags->comment_lengths=comment_lengths; - size=sizeof(*_tags->user_comments)*(_ncomments+1); - if(size/sizeof(*_tags->user_comments)!=_ncomments+1)return OP_EFAULT; - user_comments=_tags->user_comments; - binary_suffix_data=user_comments==NULL?NULL:user_comments[cur_ncomments]; - user_comments=(char **)_ogg_realloc(_tags->user_comments,size); - if(OP_UNLIKELY(user_comments==NULL))return OP_EFAULT; - user_comments[_ncomments]=binary_suffix_data; - _tags->user_comments=user_comments; - return 0; -} - -/*Duplicate a (possibly non-NUL terminated) string with a known length.*/ -static char *op_strdup_with_len(const char *_s,size_t _len){ - size_t size; - char *ret; - size=sizeof(*ret)*(_len+1); - if(OP_UNLIKELY(size<_len))return NULL; - ret=(char *)_ogg_malloc(size); - if(OP_LIKELY(ret!=NULL)){ - ret=(char *)memcpy(ret,_s,sizeof(*ret)*_len); - ret[_len]='\0'; - } - return ret; -} - -/*The actual implementation of opus_tags_parse(). - Unlike the public API, this function requires _tags to already be - initialized, modifies its contents before success is guaranteed, and assumes - the caller will clear it on error.*/ -static int opus_tags_parse_impl(OpusTags *_tags, - const unsigned char *_data,size_t _len){ - opus_uint32 count; - size_t len; - int ncomments; - int ci; - len=_len; - if(len<8)return OP_ENOTFORMAT; - if(memcmp(_data,"OpusTags",8)!=0)return OP_ENOTFORMAT; - if(len<16)return OP_EBADHEADER; - _data+=8; - len-=8; - count=op_parse_uint32le(_data); - _data+=4; - len-=4; - if(count>len)return OP_EBADHEADER; - if(_tags!=NULL){ - _tags->vendor=op_strdup_with_len((char *)_data,count); - if(_tags->vendor==NULL)return OP_EFAULT; - } - _data+=count; - len-=count; - if(len<4)return OP_EBADHEADER; - count=op_parse_uint32le(_data); - _data+=4; - len-=4; - /*Check to make sure there's minimally sufficient data left in the packet.*/ - if(count>len>>2)return OP_EBADHEADER; - /*Check for overflow (the API limits this to an int).*/ - if(count>(opus_uint32)INT_MAX-1)return OP_EFAULT; - if(_tags!=NULL){ - int ret; - ret=op_tags_ensure_capacity(_tags,count); - if(ret<0)return ret; - } - ncomments=(int)count; - for(ci=0;cilen>>2)return OP_EBADHEADER; - count=op_parse_uint32le(_data); - _data+=4; - len-=4; - if(count>len)return OP_EBADHEADER; - /*Check for overflow (the API limits this to an int).*/ - if(count>(opus_uint32)INT_MAX)return OP_EFAULT; - if(_tags!=NULL){ - _tags->user_comments[ci]=op_strdup_with_len((char *)_data,count); - if(_tags->user_comments[ci]==NULL)return OP_EFAULT; - _tags->comment_lengths[ci]=(int)count; - _tags->comments=ci+1; - /*Needed by opus_tags_clear() if we fail before parsing the (optional) - binary metadata.*/ - _tags->user_comments[ci+1]=NULL; - } - _data+=count; - len-=count; - } - if(len>0&&(_data[0]&1)){ - if(len>(opus_uint32)INT_MAX)return OP_EFAULT; - if(_tags!=NULL){ - _tags->user_comments[ncomments]=(char *)_ogg_malloc(len); - if(OP_UNLIKELY(_tags->user_comments[ncomments]==NULL))return OP_EFAULT; - memcpy(_tags->user_comments[ncomments],_data,len); - _tags->comment_lengths[ncomments]=(int)len; - } - } - return 0; -} - -int opus_tags_parse(OpusTags *_tags,const unsigned char *_data,size_t _len){ - if(_tags!=NULL){ - OpusTags tags; - int ret; - opus_tags_init(&tags); - ret=opus_tags_parse_impl(&tags,_data,_len); - if(ret<0)opus_tags_clear(&tags); - else *_tags=*&tags; - return ret; - } - else return opus_tags_parse_impl(NULL,_data,_len); -} - -/*The actual implementation of opus_tags_copy(). - Unlike the public API, this function requires _dst to already be - initialized, modifies its contents before success is guaranteed, and assumes - the caller will clear it on error.*/ -static int opus_tags_copy_impl(OpusTags *_dst,const OpusTags *_src){ - char *vendor; - int ncomments; - int ret; - int ci; - vendor=_src->vendor; - _dst->vendor=op_strdup_with_len(vendor,strlen(vendor)); - if(OP_UNLIKELY(_dst->vendor==NULL))return OP_EFAULT; - ncomments=_src->comments; - ret=op_tags_ensure_capacity(_dst,ncomments); - if(OP_UNLIKELY(ret<0))return ret; - for(ci=0;cicomment_lengths[ci]; - OP_ASSERT(len>=0); - _dst->user_comments[ci]=op_strdup_with_len(_src->user_comments[ci],len); - if(OP_UNLIKELY(_dst->user_comments[ci]==NULL))return OP_EFAULT; - _dst->comment_lengths[ci]=len; - _dst->comments=ci+1; - } - if(_src->comment_lengths!=NULL){ - int len; - len=_src->comment_lengths[ncomments]; - if(len>0){ - _dst->user_comments[ncomments]=(char *)_ogg_malloc(len); - if(OP_UNLIKELY(_dst->user_comments[ncomments]==NULL))return OP_EFAULT; - memcpy(_dst->user_comments[ncomments],_src->user_comments[ncomments],len); - _dst->comment_lengths[ncomments]=len; - } - } - return 0; -} - -int opus_tags_copy(OpusTags *_dst,const OpusTags *_src){ - OpusTags dst; - int ret; - opus_tags_init(&dst); - ret=opus_tags_copy_impl(&dst,_src); - if(OP_UNLIKELY(ret<0))opus_tags_clear(&dst); - else *_dst=*&dst; - return 0; -} - -int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value){ - char *comment; - int tag_len; - int value_len; - int ncomments; - int ret; - ncomments=_tags->comments; - ret=op_tags_ensure_capacity(_tags,ncomments+1); - if(OP_UNLIKELY(ret<0))return ret; - tag_len=strlen(_tag); - value_len=strlen(_value); - /*+2 for '=' and '\0'.*/ - comment=(char *)_ogg_malloc(sizeof(*comment)*(tag_len+value_len+2)); - if(OP_UNLIKELY(comment==NULL))return OP_EFAULT; - memcpy(comment,_tag,sizeof(*comment)*tag_len); - comment[tag_len]='='; - memcpy(comment+tag_len+1,_value,sizeof(*comment)*(value_len+1)); - _tags->user_comments[ncomments]=comment; - _tags->comment_lengths[ncomments]=tag_len+value_len+1; - _tags->comments=ncomments+1; - return 0; -} - -int opus_tags_add_comment(OpusTags *_tags,const char *_comment){ - char *comment; - int comment_len; - int ncomments; - int ret; - ncomments=_tags->comments; - ret=op_tags_ensure_capacity(_tags,ncomments+1); - if(OP_UNLIKELY(ret<0))return ret; - comment_len=(int)strlen(_comment); - comment=op_strdup_with_len(_comment,comment_len); - if(OP_UNLIKELY(comment==NULL))return OP_EFAULT; - _tags->user_comments[ncomments]=comment; - _tags->comment_lengths[ncomments]=comment_len; - _tags->comments=ncomments+1; - return 0; -} - -int opus_tags_set_binary_suffix(OpusTags *_tags, - const unsigned char *_data,int _len){ - unsigned char *binary_suffix_data; - int ncomments; - int ret; - if(_len<0||_len>0&&(_data==NULL||!(_data[0]&1)))return OP_EINVAL; - ncomments=_tags->comments; - ret=op_tags_ensure_capacity(_tags,ncomments); - if(OP_UNLIKELY(ret<0))return ret; - binary_suffix_data= - (unsigned char *)_ogg_realloc(_tags->user_comments[ncomments],_len); - if(OP_UNLIKELY(binary_suffix_data==NULL))return OP_EFAULT; - memcpy(binary_suffix_data,_data,_len); - _tags->user_comments[ncomments]=(char *)binary_suffix_data; - _tags->comment_lengths[ncomments]=_len; - return 0; -} - -int opus_tagcompare(const char *_tag_name,const char *_comment){ - return opus_tagncompare(_tag_name,strlen(_tag_name),_comment); -} - -int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){ - int ret; - OP_ASSERT(_tag_len>=0); - ret=op_strncasecmp(_tag_name,_comment,_tag_len); - return ret?ret:'='-_comment[_tag_len]; -} - -const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){ - char **user_comments; - int tag_len; - int found; - int ncomments; - int ci; - tag_len=strlen(_tag); - ncomments=_tags->comments; - user_comments=_tags->user_comments; - found=0; - for(ci=0;cicomments; - user_comments=_tags->user_comments; - found=0; - for(ci=0;cicomments; - len=_tags->comment_lengths==NULL?0:_tags->comment_lengths[ncomments]; - *_len=len; - OP_ASSERT(len==0||_tags->user_comments!=NULL); - return len>0?(const unsigned char *)_tags->user_comments[ncomments]:NULL; -} - -static int opus_tags_get_gain(const OpusTags *_tags,int *_gain_q8, - const char *_tag_name,size_t _tag_len){ - char **comments; - int ncomments; - int ci; - comments=_tags->user_comments; - ncomments=_tags->comments; - /*Look for the first valid tag with the name _tag_name and use that.*/ - for(ci=0;ci='0'&&*p<='9'){ - gain_q8=10*gain_q8+*p-'0'; - if(gain_q8>32767-negative)break; - p++; - } - /*This didn't look like a signed 16-bit decimal integer. - Not a valid gain tag.*/ - if(*p!='\0')continue; - *_gain_q8=(int)(gain_q8+negative^negative); - return 0; - } - } - return OP_FALSE; -} - -int opus_tags_get_album_gain(const OpusTags *_tags,int *_gain_q8){ - return opus_tags_get_gain(_tags,_gain_q8,"R128_ALBUM_GAIN",15); -} - -int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8){ - return opus_tags_get_gain(_tags,_gain_q8,"R128_TRACK_GAIN",15); -} - -static int op_is_jpeg(const unsigned char *_buf,size_t _buf_sz){ - return _buf_sz>=11&&memcmp(_buf,"\xFF\xD8\xFF\xE0",4)==0 - &&(_buf[4]<<8|_buf[5])>=16&&memcmp(_buf+6,"JFIF",5)==0; -} - -/*Tries to extract the width, height, bits per pixel, and palette size of a - JPEG. - On failure, simply leaves its outputs unmodified.*/ -static void op_extract_jpeg_params(const unsigned char *_buf,size_t _buf_sz, - opus_uint32 *_width,opus_uint32 *_height, - opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ - if(op_is_jpeg(_buf,_buf_sz)){ - size_t offs; - offs=2; - for(;;){ - size_t segment_len; - int marker; - while(offs<_buf_sz&&_buf[offs]!=0xFF)offs++; - while(offs<_buf_sz&&_buf[offs]==0xFF)offs++; - marker=_buf[offs]; - offs++; - /*If we hit EOI* (end of image), or another SOI* (start of image), - or SOS (start of scan), then stop now.*/ - if(offs>=_buf_sz||(marker>=0xD8&&marker<=0xDA))break; - /*RST* (restart markers): skip (no segment length).*/ - else if(marker>=0xD0&&marker<=0xD7)continue; - /*Read the length of the marker segment.*/ - if(_buf_sz-offs<2)break; - segment_len=_buf[offs]<<8|_buf[offs+1]; - if(segment_len<2||_buf_sz-offs0xC0&&marker<0xD0&&(marker&3)!=0)){ - /*Found a SOFn (start of frame) marker segment:*/ - if(segment_len>=8){ - *_height=_buf[offs+3]<<8|_buf[offs+4]; - *_width=_buf[offs+5]<<8|_buf[offs+6]; - *_depth=_buf[offs+2]*_buf[offs+7]; - *_colors=0; - *_has_palette=0; - } - break; - } - /*Other markers: skip the whole marker segment.*/ - offs+=segment_len; - } - } -} - -static int op_is_png(const unsigned char *_buf,size_t _buf_sz){ - return _buf_sz>=8&&memcmp(_buf,"\x89PNG\x0D\x0A\x1A\x0A",8)==0; -} - -/*Tries to extract the width, height, bits per pixel, and palette size of a - PNG. - On failure, simply leaves its outputs unmodified.*/ -static void op_extract_png_params(const unsigned char *_buf,size_t _buf_sz, - opus_uint32 *_width,opus_uint32 *_height, - opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ - if(op_is_png(_buf,_buf_sz)){ - size_t offs; - offs=8; - while(_buf_sz-offs>=12){ - ogg_uint32_t chunk_len; - chunk_len=op_parse_uint32be(_buf+offs); - if(chunk_len>_buf_sz-(offs+12))break; - else if(chunk_len==13&&memcmp(_buf+offs+4,"IHDR",4)==0){ - int color_type; - *_width=op_parse_uint32be(_buf+offs+8); - *_height=op_parse_uint32be(_buf+offs+12); - color_type=_buf[offs+17]; - if(color_type==3){ - *_depth=24; - *_has_palette=1; - } - else{ - int sample_depth; - sample_depth=_buf[offs+16]; - if(color_type==0)*_depth=sample_depth; - else if(color_type==2)*_depth=sample_depth*3; - else if(color_type==4)*_depth=sample_depth*2; - else if(color_type==6)*_depth=sample_depth*4; - *_colors=0; - *_has_palette=0; - break; - } - } - else if(*_has_palette>0&&memcmp(_buf+offs+4,"PLTE",4)==0){ - *_colors=chunk_len/3; - break; - } - offs+=12+chunk_len; - } - } -} - -static int op_is_gif(const unsigned char *_buf,size_t _buf_sz){ - return _buf_sz>=6&&(memcmp(_buf,"GIF87a",6)==0||memcmp(_buf,"GIF89a",6)==0); -} - -/*Tries to extract the width, height, bits per pixel, and palette size of a - GIF. - On failure, simply leaves its outputs unmodified.*/ -static void op_extract_gif_params(const unsigned char *_buf,size_t _buf_sz, - opus_uint32 *_width,opus_uint32 *_height, - opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ - if(op_is_gif(_buf,_buf_sz)&&_buf_sz>=14){ - *_width=_buf[6]|_buf[7]<<8; - *_height=_buf[8]|_buf[9]<<8; - /*libFLAC hard-codes the depth to 24.*/ - *_depth=24; - *_colors=1<<((_buf[10]&7)+1); - *_has_palette=1; - } -} - -/*The actual implementation of opus_picture_tag_parse(). - Unlike the public API, this function requires _pic to already be - initialized, modifies its contents before success is guaranteed, and assumes - the caller will clear it on error.*/ -static int opus_picture_tag_parse_impl(OpusPictureTag *_pic,const char *_tag, - unsigned char *_buf,size_t _buf_sz,size_t _base64_sz){ - opus_int32 picture_type; - opus_uint32 mime_type_length; - char *mime_type; - opus_uint32 description_length; - char *description; - opus_uint32 width; - opus_uint32 height; - opus_uint32 depth; - opus_uint32 colors; - opus_uint32 data_length; - opus_uint32 file_width; - opus_uint32 file_height; - opus_uint32 file_depth; - opus_uint32 file_colors; - int format; - int has_palette; - int colors_set; - size_t i; - /*Decode the BASE64 data.*/ - for(i=0;i<_base64_sz;i++){ - opus_uint32 value; - int j; - value=0; - for(j=0;j<4;j++){ - unsigned c; - unsigned d; - c=(unsigned char)_tag[4*i+j]; - if(c=='+')d=62; - else if(c=='/')d=63; - else if(c>='0'&&c<='9')d=52+c-'0'; - else if(c>='a'&&c<='z')d=26+c-'a'; - else if(c>='A'&&c<='Z')d=c-'A'; - else if(c=='='&&3*i+j>_buf_sz)d=0; - else return OP_ENOTFORMAT; - value=value<<6|d; - } - _buf[3*i]=(unsigned char)(value>>16); - if(3*i+1<_buf_sz){ - _buf[3*i+1]=(unsigned char)(value>>8); - if(3*i+2<_buf_sz)_buf[3*i+2]=(unsigned char)value; - } - } - i=0; - picture_type=op_parse_uint32be(_buf+i); - i+=4; - /*Extract the MIME type.*/ - mime_type_length=op_parse_uint32be(_buf+i); - i+=4; - if(mime_type_length>_buf_sz-32)return OP_ENOTFORMAT; - mime_type=(char *)_ogg_malloc(sizeof(*_pic->mime_type)*(mime_type_length+1)); - if(mime_type==NULL)return OP_EFAULT; - memcpy(mime_type,_buf+i,sizeof(*mime_type)*mime_type_length); - mime_type[mime_type_length]='\0'; - _pic->mime_type=mime_type; - i+=mime_type_length; - /*Extract the description string.*/ - description_length=op_parse_uint32be(_buf+i); - i+=4; - if(description_length>_buf_sz-mime_type_length-32)return OP_ENOTFORMAT; - description= - (char *)_ogg_malloc(sizeof(*_pic->mime_type)*(description_length+1)); - if(description==NULL)return OP_EFAULT; - memcpy(description,_buf+i,sizeof(*description)*description_length); - description[description_length]='\0'; - _pic->description=description; - i+=description_length; - /*Extract the remaining fields.*/ - width=op_parse_uint32be(_buf+i); - i+=4; - height=op_parse_uint32be(_buf+i); - i+=4; - depth=op_parse_uint32be(_buf+i); - i+=4; - colors=op_parse_uint32be(_buf+i); - i+=4; - /*If one of these is set, they all must be, but colors==0 is a valid value.*/ - colors_set=width!=0||height!=0||depth!=0||colors!=0; - if((width==0||height==0||depth==0)&&colors_set)return OP_ENOTFORMAT; - data_length=op_parse_uint32be(_buf+i); - i+=4; - if(data_length>_buf_sz-i)return OP_ENOTFORMAT; - /*Trim extraneous data so we don't copy it below.*/ - _buf_sz=i+data_length; - /*Attempt to determine the image format.*/ - format=OP_PIC_FORMAT_UNKNOWN; - if(mime_type_length==3&&strcmp(mime_type,"-->")==0){ - format=OP_PIC_FORMAT_URL; - /*Picture type 1 must be a 32x32 PNG.*/ - if(picture_type==1&&(width!=0||height!=0)&&(width!=32||height!=32)){ - return OP_ENOTFORMAT; - } - /*Append a terminating NUL for the convenience of our callers.*/ - _buf[_buf_sz++]='\0'; - } - else{ - if(mime_type_length==10 - &&op_strncasecmp(mime_type,"image/jpeg",mime_type_length)==0){ - if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG; - } - else if(mime_type_length==9 - &&op_strncasecmp(mime_type,"image/png",mime_type_length)==0){ - if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG; - } - else if(mime_type_length==9 - &&op_strncasecmp(mime_type,"image/gif",mime_type_length)==0){ - if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF; - } - else if(mime_type_length==0||(mime_type_length==6 - &&op_strncasecmp(mime_type,"image/",mime_type_length)==0)){ - if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG; - else if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG; - else if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF; - } - file_width=file_height=file_depth=file_colors=0; - has_palette=-1; - switch(format){ - case OP_PIC_FORMAT_JPEG:{ - op_extract_jpeg_params(_buf+i,data_length, - &file_width,&file_height,&file_depth,&file_colors,&has_palette); - }break; - case OP_PIC_FORMAT_PNG:{ - op_extract_png_params(_buf+i,data_length, - &file_width,&file_height,&file_depth,&file_colors,&has_palette); - }break; - case OP_PIC_FORMAT_GIF:{ - op_extract_gif_params(_buf+i,data_length, - &file_width,&file_height,&file_depth,&file_colors,&has_palette); - }break; - } - if(has_palette>=0){ - /*If we successfully extracted these parameters from the image, override - any declared values.*/ - width=file_width; - height=file_height; - depth=file_depth; - colors=file_colors; - } - /*Picture type 1 must be a 32x32 PNG.*/ - if(picture_type==1&&(format!=OP_PIC_FORMAT_PNG||width!=32||height!=32)){ - return OP_ENOTFORMAT; - } - } - /*Adjust _buf_sz instead of using data_length to capture the terminating NUL - for URLs.*/ - _buf_sz-=i; - memmove(_buf,_buf+i,sizeof(*_buf)*_buf_sz); - _buf=(unsigned char *)_ogg_realloc(_buf,_buf_sz); - if(_buf_sz>0&&_buf==NULL)return OP_EFAULT; - _pic->type=picture_type; - _pic->width=width; - _pic->height=height; - _pic->depth=depth; - _pic->colors=colors; - _pic->data_length=data_length; - _pic->data=_buf; - _pic->format=format; - return 0; -} - -int opus_picture_tag_parse(OpusPictureTag *_pic,const char *_tag){ - OpusPictureTag pic; - unsigned char *buf; - size_t base64_sz; - size_t buf_sz; - size_t tag_length; - int ret; - if(opus_tagncompare("METADATA_BLOCK_PICTURE",22,_tag)==0)_tag+=23; - /*Figure out how much BASE64-encoded data we have.*/ - tag_length=strlen(_tag); - if(tag_length&3)return OP_ENOTFORMAT; - base64_sz=tag_length>>2; - buf_sz=3*base64_sz; - if(buf_sz<32)return OP_ENOTFORMAT; - if(_tag[tag_length-1]=='=')buf_sz--; - if(_tag[tag_length-2]=='=')buf_sz--; - if(buf_sz<32)return OP_ENOTFORMAT; - /*Allocate an extra byte to allow appending a terminating NUL to URL data.*/ - buf=(unsigned char *)_ogg_malloc(sizeof(*buf)*(buf_sz+1)); - if(buf==NULL)return OP_EFAULT; - opus_picture_tag_init(&pic); - ret=opus_picture_tag_parse_impl(&pic,_tag,buf,buf_sz,base64_sz); - if(ret<0){ - opus_picture_tag_clear(&pic); - _ogg_free(buf); - } - else *_pic=*&pic; - return ret; -} - -void opus_picture_tag_init(OpusPictureTag *_pic){ - memset(_pic,0,sizeof(*_pic)); -} - -void opus_picture_tag_clear(OpusPictureTag *_pic){ - _ogg_free(_pic->description); - _ogg_free(_pic->mime_type); - _ogg_free(_pic->data); -} diff --git a/thirdparty/opus/internal.c b/thirdparty/opus/internal.c deleted file mode 100644 index 96c80def82..0000000000 --- a/thirdparty/opus/internal.c +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" - -#if defined(OP_ENABLE_ASSERTIONS) -void op_fatal_impl(const char *_str,const char *_file,int _line){ - fprintf(stderr,"Fatal (internal) error in %s, line %i: %s\n", - _file,_line,_str); - abort(); -} -#endif - -/*A version of strncasecmp() that is guaranteed to only ignore the case of - ASCII characters.*/ -int op_strncasecmp(const char *_a,const char *_b,int _n){ - int i; - for(i=0;i<_n;i++){ - int a; - int b; - int d; - a=_a[i]; - b=_b[i]; - if(a>='a'&&a<='z')a-='a'-'A'; - if(b>='a'&&b<='z')b-='a'-'A'; - d=a-b; - if(d)return d; - } - return 0; -} diff --git a/thirdparty/opus/internal.h b/thirdparty/opus/internal.h deleted file mode 100644 index ee48ea34c9..0000000000 --- a/thirdparty/opus/internal.h +++ /dev/null @@ -1,254 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ********************************************************************/ -#if !defined(_opusfile_internal_h) -# define _opusfile_internal_h (1) - -# if !defined(_REENTRANT) -# define _REENTRANT -# endif -# if !defined(_GNU_SOURCE) -# define _GNU_SOURCE -# endif -# if !defined(_LARGEFILE_SOURCE) -# define _LARGEFILE_SOURCE -# endif -# if !defined(_LARGEFILE64_SOURCE) -# define _LARGEFILE64_SOURCE -# endif -# if !defined(_FILE_OFFSET_BITS) -# define _FILE_OFFSET_BITS 64 -# endif - -# include -# include - -typedef struct OggOpusLink OggOpusLink; - -# if defined(OP_FIXED_POINT) - -typedef opus_int16 op_sample; - -# else - -typedef float op_sample; - -/*We're using this define to test for libopus 1.1 or later until libopus - provides a better mechanism.*/ -# if defined(OPUS_GET_EXPERT_FRAME_DURATION_REQUEST) -/*Enable soft clipping prevention in 16-bit decodes.*/ -# define OP_SOFT_CLIP (1) -# endif - -# endif - -# if OP_GNUC_PREREQ(4,2) -/*Disable excessive warnings about the order of operations.*/ -# pragma GCC diagnostic ignored "-Wparentheses" -# elif defined(_MSC_VER) -/*Disable excessive warnings about the order of operations.*/ -# pragma warning(disable:4554) -/*Disable warnings about "deprecated" POSIX functions.*/ -# pragma warning(disable:4996) -# endif - -# if OP_GNUC_PREREQ(3,0) -/*Another alternative is - (__builtin_constant_p(_x)?!!(_x):__builtin_expect(!!(_x),1)) - but that evaluates _x multiple times, which may be bad.*/ -# define OP_LIKELY(_x) (__builtin_expect(!!(_x),1)) -# define OP_UNLIKELY(_x) (__builtin_expect(!!(_x),0)) -# else -# define OP_LIKELY(_x) (!!(_x)) -# define OP_UNLIKELY(_x) (!!(_x)) -# endif - -# if defined(OP_ENABLE_ASSERTIONS) -# if OP_GNUC_PREREQ(2,5)||__SUNPRO_C>=0x590 -__attribute__((noreturn)) -# endif -void op_fatal_impl(const char *_str,const char *_file,int _line); - -# define OP_FATAL(_str) (op_fatal_impl(_str,__FILE__,__LINE__)) - -# define OP_ASSERT(_cond) \ - do{ \ - if(OP_UNLIKELY(!(_cond)))OP_FATAL("assertion failed: " #_cond); \ - } \ - while(0) -# define OP_ALWAYS_TRUE(_cond) OP_ASSERT(_cond) - -# else -# define OP_FATAL(_str) abort() -# define OP_ASSERT(_cond) -# define OP_ALWAYS_TRUE(_cond) ((void)(_cond)) -# endif - -# define OP_INT64_MAX (2*(((ogg_int64_t)1<<62)-1)|1) -# define OP_INT64_MIN (-OP_INT64_MAX-1) -# define OP_INT32_MAX (2*(((ogg_int32_t)1<<30)-1)|1) -# define OP_INT32_MIN (-OP_INT32_MAX-1) - -# define OP_MIN(_a,_b) ((_a)<(_b)?(_a):(_b)) -# define OP_MAX(_a,_b) ((_a)>(_b)?(_a):(_b)) -# define OP_CLAMP(_lo,_x,_hi) (OP_MAX(_lo,OP_MIN(_x,_hi))) - -/*Advance a file offset by the given amount, clamping against OP_INT64_MAX. - This is used to advance a known offset by things like OP_CHUNK_SIZE or - OP_PAGE_SIZE_MAX, while making sure to avoid signed overflow. - It assumes that both _offset and _amount are non-negative.*/ -#define OP_ADV_OFFSET(_offset,_amount) \ - (OP_MIN(_offset,OP_INT64_MAX-(_amount))+(_amount)) - -/*The maximum channel count for any mapping we'll actually decode.*/ -# define OP_NCHANNELS_MAX (8) - -/*Initial state.*/ -# define OP_NOTOPEN (0) -/*We've found the first Opus stream in the first link.*/ -# define OP_PARTOPEN (1) -# define OP_OPENED (2) -/*We've found the first Opus stream in the current link.*/ -# define OP_STREAMSET (3) -/*We've initialized the decoder for the chosen Opus stream in the current - link.*/ -# define OP_INITSET (4) - -/*Information cached for a single link in a chained Ogg Opus file. - We choose the first Opus stream encountered in each link to play back (and - require at least one).*/ -struct OggOpusLink{ - /*The byte offset of the first header page in this link.*/ - opus_int64 offset; - /*The byte offset of the first data page from the chosen Opus stream in this - link (after the headers).*/ - opus_int64 data_offset; - /*The byte offset of the last page from the chosen Opus stream in this link. - This is used when seeking to ensure we find a page before the last one, so - that end-trimming calculations work properly. - This is only valid for seekable sources.*/ - opus_int64 end_offset; - /*The granule position of the last sample. - This is only valid for seekable sources.*/ - ogg_int64_t pcm_end; - /*The granule position before the first sample.*/ - ogg_int64_t pcm_start; - /*The serial number.*/ - ogg_uint32_t serialno; - /*The contents of the info header.*/ - OpusHead head; - /*The contents of the comment header.*/ - OpusTags tags; -}; - -struct OggOpusFile{ - /*The callbacks used to access the data source.*/ - OpusFileCallbacks callbacks; - /*A FILE *, memory bufer, etc.*/ - void *source; - /*Whether or not we can seek with this data source.*/ - int seekable; - /*The number of links in this chained Ogg Opus file.*/ - int nlinks; - /*The cached information from each link in a chained Ogg Opus file. - If source isn't seekable (e.g., it's a pipe), only the current link - appears.*/ - OggOpusLink *links; - /*The number of serial numbers from a single link.*/ - int nserialnos; - /*The capacity of the list of serial numbers from a single link.*/ - int cserialnos; - /*Storage for the list of serial numbers from a single link.*/ - ogg_uint32_t *serialnos; - /*This is the current offset of the data processed by the ogg_sync_state. - After a seek, this should be set to the target offset so that we can track - the byte offsets of subsequent pages. - After a call to op_get_next_page(), this will point to the first byte after - that page.*/ - opus_int64 offset; - /*The total size of this data source, or -1 if it's unseekable.*/ - opus_int64 end; - /*Used to locate pages in the data source.*/ - ogg_sync_state oy; - /*One of OP_NOTOPEN, OP_PARTOPEN, OP_OPENED, OP_STREAMSET, OP_INITSET.*/ - int ready_state; - /*The current link being played back.*/ - int cur_link; - /*The number of decoded samples to discard from the start of decoding.*/ - opus_int32 cur_discard_count; - /*The granule position of the previous packet (current packet start time).*/ - ogg_int64_t prev_packet_gp; - /*The stream offset of the most recent page with completed packets, or -1. - This is only needed to recover continued packet data in the seeking logic, - when we use the current position as one of our bounds, only to later - discover it was the correct starting point.*/ - opus_int64 prev_page_offset; - /*The number of bytes read since the last bitrate query, including framing.*/ - opus_int64 bytes_tracked; - /*The number of samples decoded since the last bitrate query.*/ - ogg_int64_t samples_tracked; - /*Takes physical pages and welds them into a logical stream of packets.*/ - ogg_stream_state os; - /*Re-timestamped packets from a single page. - Buffering these relies on the undocumented libogg behavior that ogg_packet - pointers remain valid until the next page is submitted to the - ogg_stream_state they came from.*/ - ogg_packet op[255]; - /*The index of the next packet to return.*/ - int op_pos; - /*The total number of packets available.*/ - int op_count; - /*Central working state for the packet-to-PCM decoder.*/ - OpusMSDecoder *od; - /*The application-provided packet decode callback.*/ - op_decode_cb_func decode_cb; - /*The application-provided packet decode callback context.*/ - void *decode_cb_ctx; - /*The stream count used to initialize the decoder.*/ - int od_stream_count; - /*The coupled stream count used to initialize the decoder.*/ - int od_coupled_count; - /*The channel count used to initialize the decoder.*/ - int od_channel_count; - /*The channel mapping used to initialize the decoder.*/ - unsigned char od_mapping[OP_NCHANNELS_MAX]; - /*The buffered data for one decoded packet.*/ - op_sample *od_buffer; - /*The current position in the decoded buffer.*/ - int od_buffer_pos; - /*The number of valid samples in the decoded buffer.*/ - int od_buffer_size; - /*The type of gain offset to apply. - One of OP_HEADER_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/ - int gain_type; - /*The offset to apply to the gain.*/ - opus_int32 gain_offset_q8; - /*Internal state for soft clipping and dithering float->short output.*/ -#if !defined(OP_FIXED_POINT) -# if defined(OP_SOFT_CLIP) - float clip_state[OP_NCHANNELS_MAX]; -# endif - float dither_a[OP_NCHANNELS_MAX*4]; - float dither_b[OP_NCHANNELS_MAX*4]; - opus_uint32 dither_seed; - int dither_mute; - int dither_disabled; - /*The number of channels represented by the internal state. - This gets set to 0 whenever anything that would prevent state propagation - occurs (switching between the float/short APIs, or between the - stereo/multistream APIs).*/ - int state_channel_count; -#endif -}; - -int op_strncasecmp(const char *_a,const char *_b,int _n); - -#endif diff --git a/thirdparty/opus/mlp.c b/thirdparty/opus/mlp.c deleted file mode 100644 index ff9e50df47..0000000000 --- a/thirdparty/opus/mlp.c +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (c) 2008-2011 Octasic Inc. - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_types.h" -#include "opus_defines.h" - -#include -#include "mlp.h" -#include "arch.h" -#include "tansig_table.h" -#define MAX_NEURONS 100 - -#if 0 -static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ -{ - int i; - opus_val16 xx; /* Q11 */ - /*double x, y;*/ - opus_val16 dy, yy; /* Q14 */ - /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(8,19)) - return QCONST32(1.,14); - if (_x<=-QCONST32(8,19)) - return -QCONST32(1.,14); - xx = EXTRACT16(SHR32(_x, 8)); - /*i = lrint(25*x);*/ - i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); - /*x -= .04*i;*/ - xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); - /*x = xx*(1./2048);*/ - /*y = tansig_table[250+i];*/ - yy = tansig_table[250+i]; - /*y = yy*(1./16384);*/ - dy = 16384-MULT16_16_Q14(yy,yy); - yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); - return yy; -} -#else -/*extern const float tansig_table[501];*/ -static OPUS_INLINE float tansig_approx(float x) -{ - int i; - float y, dy; - float sign=1; - /* Tests are reversed to catch NaNs */ - if (!(x<8)) - return 1; - if (!(x>-8)) - return -1; -#ifndef FIXED_POINT - /* Another check in case of -ffast-math */ - if (celt_isnan(x)) - return 0; -#endif - if (x<0) - { - x=-x; - sign=-1; - } - i = (int)floor(.5f+25*x); - x -= .04f*i; - y = tansig_table[i]; - dy = 1-y*y; - y = y + x*dy*(1 - y*x); - return sign*y; -} -#endif - -#if 0 -void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) -{ - int j; - opus_val16 hidden[MAX_NEURONS]; - const opus_val16 *W = m->weights; - /* Copy to tmp_in */ - for (j=0;jtopo[1];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),8); - for (k=0;ktopo[0];k++) - sum = MAC16_16(sum, in[k],*W++); - hidden[j] = tansig_approx(sum); - } - for (j=0;jtopo[2];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),14); - for (k=0;ktopo[1];k++) - sum = MAC16_16(sum, hidden[k], *W++); - out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); - } -} -#else -void mlp_process(const MLP *m, const float *in, float *out) -{ - int j; - float hidden[MAX_NEURONS]; - const float *W = m->weights; - /* Copy to tmp_in */ - for (j=0;jtopo[1];j++) - { - int k; - float sum = *W++; - for (k=0;ktopo[0];k++) - sum = sum + in[k]**W++; - hidden[j] = tansig_approx(sum); - } - for (j=0;jtopo[2];j++) - { - int k; - float sum = *W++; - for (k=0;ktopo[1];k++) - sum = sum + hidden[k]**W++; - out[j] = tansig_approx(sum); - } -} -#endif diff --git a/thirdparty/opus/mlp.h b/thirdparty/opus/mlp.h deleted file mode 100644 index 618e246e2c..0000000000 --- a/thirdparty/opus/mlp.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2008-2011 Octasic Inc. - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _MLP_H_ -#define _MLP_H_ - -#include "arch.h" - -typedef struct { - int layers; - const int *topo; - const float *weights; -} MLP; - -extern const MLP net; - -void mlp_process(const MLP *m, const float *in, float *out); - -#endif /* _MLP_H_ */ diff --git a/thirdparty/opus/mlp_data.c b/thirdparty/opus/mlp_data.c deleted file mode 100644 index c2fda4e2e5..0000000000 --- a/thirdparty/opus/mlp_data.c +++ /dev/null @@ -1,109 +0,0 @@ -/* The contents of this file was automatically generated by mlp_train.c - It contains multi-layer perceptron (MLP) weights. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mlp.h" - -/* RMS error was 0.138320, seed was 1361535663 */ - -static const float weights[422] = { - -/* hidden layer */ --0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f, --0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f, --0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f, -0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f, -0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f, -24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f, --0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f, --0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f, --0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f, -1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f, -15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f, -0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f, --0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f, -0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f, -0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f, --1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f, --0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f, --0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f, -0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f, --0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f, -2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f, -0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f, --0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f, -0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f, -0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f, --4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f, -5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f, --0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f, --0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f, --0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f, -1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f, --7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f, --0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f, -0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f, -0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f, --0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f, -10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f, --0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f, --0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f, --0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f, -0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f, --0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f, -0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f, -0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f, --0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f, -0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f, --0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f, --0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f, --0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f, --0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f, --0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f, -5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f, -1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f, -0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f, --0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f, -0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f, --0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f, --975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f, -0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f, --0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f, --2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f, -0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f, --6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f, -0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f, --0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f, --0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f, -0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f, --0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f, -0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f, --0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f, -0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f, --2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f, -4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f, -0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f, --0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f, -0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f, -0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f, -3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f, - -/* output layer */ --0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f, -0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f, -0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f, -0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f, -4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f, --1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f, -3.87308f, 3.52558f}; - -static const int topo[3] = {25, 15, 2}; - -const MLP net = { - 3, - topo, - weights -}; diff --git a/thirdparty/opus/opus.c b/thirdparty/opus/opus.c deleted file mode 100644 index f76f125cfa..0000000000 --- a/thirdparty/opus/opus.c +++ /dev/null @@ -1,356 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus.h" -#include "opus_private.h" - -#ifndef DISABLE_FLOAT_API -OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem) -{ - int c; - int i; - float *x; - - if (C<1 || N<1 || !_x || !declip_mem) return; - - /* First thing: saturate everything to +/- 2 which is the highest level our - non-linearity can handle. At the point where the signal reaches +/-2, - the derivative will be zero anyway, so this doesn't introduce any - discontinuity in the derivative. */ - for (i=0;i=0) - break; - x[i*C] = x[i*C]+a*x[i*C]*x[i*C]; - } - - curr=0; - x0 = x[0]; - while(1) - { - int start, end; - float maxval; - int special=0; - int peak_pos; - for (i=curr;i1 || x[i*C]<-1) - break; - } - if (i==N) - { - a=0; - break; - } - peak_pos = i; - start=end=i; - maxval=ABS16(x[i*C]); - /* Look for first zero crossing before clipping */ - while (start>0 && x[i*C]*x[(start-1)*C]>=0) - start--; - /* Look for first zero crossing after clipping */ - while (end=0) - { - /* Look for other peaks until the next zero-crossing. */ - if (ABS16(x[end*C])>maxval) - { - maxval = ABS16(x[end*C]); - peak_pos = end; - } - end++; - } - /* Detect the special case where we clip before the first zero crossing */ - special = (start==0 && x[i*C]*x[0]>=0); - - /* Compute a such that maxval + a*maxval^2 = 1 */ - a=(maxval-1)/(maxval*maxval); - /* Slightly boost "a" by 2^-22. This is just enough to ensure -ffast-math - does not cause output values larger than +/-1, but small enough not - to matter even for 24-bit output. */ - a += a*2.4e-7; - if (x[i*C]>0) - a = -a; - /* Apply soft clipping */ - for (i=start;i=2) - { - /* Add a linear ramp from the first sample to the signal peak. - This avoids a discontinuity at the beginning of the frame. */ - float delta; - float offset = x0-x[0]; - delta = offset / peak_pos; - for (i=curr;i>2; - return 2; - } -} - -static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size) -{ - if (len<1) - { - *size = -1; - return -1; - } else if (data[0]<252) - { - *size = data[0]; - return 1; - } else if (len<2) - { - *size = -1; - return -1; - } else { - *size = 4*data[1] + data[0]; - return 2; - } -} - -int opus_packet_get_samples_per_frame(const unsigned char *data, - opus_int32 Fs) -{ - int audiosize; - if (data[0]&0x80) - { - audiosize = ((data[0]>>3)&0x3); - audiosize = (Fs<>3)&0x3); - if (audiosize == 3) - audiosize = Fs*60/1000; - else - audiosize = (Fs< len) - return OPUS_INVALID_PACKET; - data += bytes; - last_size = len-size[0]; - break; - /* Multiple CBR/VBR frames (from 0 to 120 ms) */ - default: /*case 3:*/ - if (len<1) - return OPUS_INVALID_PACKET; - /* Number of frames encoded in bits 0 to 5 */ - ch = *data++; - count = ch&0x3F; - if (count <= 0 || framesize*count > 5760) - return OPUS_INVALID_PACKET; - len--; - /* Padding flag is bit 6 */ - if (ch&0x40) - { - int p; - do { - int tmp; - if (len<=0) - return OPUS_INVALID_PACKET; - p = *data++; - len--; - tmp = p==255 ? 254: p; - len -= tmp; - pad += tmp; - } while (p==255); - } - if (len<0) - return OPUS_INVALID_PACKET; - /* VBR flag is bit 7 */ - cbr = !(ch&0x80); - if (!cbr) - { - /* VBR case */ - last_size = len; - for (i=0;i len) - return OPUS_INVALID_PACKET; - data += bytes; - last_size -= bytes+size[i]; - } - if (last_size<0) - return OPUS_INVALID_PACKET; - } else if (!self_delimited) - { - /* CBR case */ - last_size = len/count; - if (last_size*count!=len) - return OPUS_INVALID_PACKET; - for (i=0;i len) - return OPUS_INVALID_PACKET; - data += bytes; - /* For CBR packets, apply the size to all the frames. */ - if (cbr) - { - if (size[count-1]*count > len) - return OPUS_INVALID_PACKET; - for (i=0;i last_size) - return OPUS_INVALID_PACKET; - } else - { - /* Because it's not encoded explicitly, it's possible the size of the - last packet (or all the packets, for the CBR case) is larger than - 1275. Reject them here.*/ - if (last_size > 1275) - return OPUS_INVALID_PACKET; - size[count-1] = (opus_int16)last_size; - } - - if (payload_offset) - *payload_offset = (int)(data-data0); - - for (i=0;i - *
  • audio_frame is the audio data in opus_int16 (or float for opus_encode_float())
  • - *
  • frame_size is the duration of the frame in samples (per channel)
  • - *
  • packet is the byte array to which the compressed data is written
  • - *
  • max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended). - * Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.
  • - * - * - * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet. - * The return value can be negative, which indicates that an error has occurred. If the return value - * is 2 bytes or less, then the packet does not need to be transmitted (DTX). - * - * Once the encoder state if no longer needed, it can be destroyed with - * - * @code - * opus_encoder_destroy(enc); - * @endcode - * - * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(), - * then no action is required aside from potentially freeing the memory that was manually - * allocated for it (calling free(enc) for the example above) - * - */ - -/** Opus encoder state. - * This contains the complete state of an Opus encoder. - * It is position independent and can be freely copied. - * @see opus_encoder_create,opus_encoder_init - */ -typedef struct OpusEncoder OpusEncoder; - -/** Gets the size of an OpusEncoder structure. - * @param[in] channels int: Number of channels. - * This must be 1 or 2. - * @returns The size in bytes. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_encoder_get_size(int channels); - -/** - */ - -/** Allocates and initializes an encoder state. - * There are three coding modes: - * - * @ref OPUS_APPLICATION_VOIP gives best quality at a given bitrate for voice - * signals. It enhances the input signal by high-pass filtering and - * emphasizing formants and harmonics. Optionally it includes in-band - * forward error correction to protect against packet loss. Use this - * mode for typical VoIP applications. Because of the enhancement, - * even at high bitrates the output may sound different from the input. - * - * @ref OPUS_APPLICATION_AUDIO gives best quality at a given bitrate for most - * non-voice signals like music. Use this mode for music and mixed - * (music/voice) content, broadcast, and applications requiring less - * than 15 ms of coding delay. - * - * @ref OPUS_APPLICATION_RESTRICTED_LOWDELAY configures low-delay mode that - * disables the speech-optimized mode in exchange for slightly reduced delay. - * This mode can only be set on an newly initialized or freshly reset encoder - * because it changes the codec delay. - * - * This is useful when the caller knows that the speech-optimized modes will not be needed (use with caution). - * @param [in] Fs opus_int32: Sampling rate of input signal (Hz) - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels int: Number of channels (1 or 2) in input signal - * @param [in] application int: Coding mode (@ref OPUS_APPLICATION_VOIP/@ref OPUS_APPLICATION_AUDIO/@ref OPUS_APPLICATION_RESTRICTED_LOWDELAY) - * @param [out] error int*: @ref opus_errorcodes - * @note Regardless of the sampling rate and number channels selected, the Opus encoder - * can switch to a lower audio bandwidth or number of channels if the bitrate - * selected is too low. This also means that it is safe to always use 48 kHz stereo input - * and let the encoder optimize the encoding. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create( - opus_int32 Fs, - int channels, - int application, - int *error -); - -/** Initializes a previously allocated encoder state - * The memory pointed to by st must be at least the size returned by opus_encoder_get_size(). - * This is intended for applications which use their own allocator instead of malloc. - * @see opus_encoder_create(),opus_encoder_get_size() - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @param [in] st OpusEncoder*: Encoder state - * @param [in] Fs opus_int32: Sampling rate of input signal (Hz) - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels int: Number of channels (1 or 2) in input signal - * @param [in] application int: Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO/OPUS_APPLICATION_RESTRICTED_LOWDELAY) - * @retval #OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_EXPORT int opus_encoder_init( - OpusEncoder *st, - opus_int32 Fs, - int channels, - int application -) OPUS_ARG_NONNULL(1); - -/** Encodes an Opus frame. - * @param [in] st OpusEncoder*: Encoder state - * @param [in] pcm opus_int16*: Input signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16) - * @param [in] frame_size int: Number of samples per channel in the - * input signal. - * This must be an Opus frame size for - * the encoder's sampling rate. - * For example, at 48 kHz the permitted - * values are 120, 240, 480, 960, 1920, - * and 2880. - * Passing in a duration of less than - * 10 ms (480 samples at 48 kHz) will - * prevent the encoder from using the LPC - * or hybrid modes. - * @param [out] data unsigned char*: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes opus_int32: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode( - OpusEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Encodes an Opus frame from floating point input. - * @param [in] st OpusEncoder*: Encoder state - * @param [in] pcm float*: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0. - * Samples with a range beyond +/-1.0 are supported but will - * be clipped by decoders using the integer API and should - * only be used if it is known that the far end supports - * extended dynamic range. - * length is frame_size*channels*sizeof(float) - * @param [in] frame_size int: Number of samples per channel in the - * input signal. - * This must be an Opus frame size for - * the encoder's sampling rate. - * For example, at 48 kHz the permitted - * values are 120, 240, 480, 960, 1920, - * and 2880. - * Passing in a duration of less than - * 10 ms (480 samples at 48 kHz) will - * prevent the encoder from using the LPC - * or hybrid modes. - * @param [out] data unsigned char*: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes opus_int32: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode_float( - OpusEncoder *st, - const float *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Frees an OpusEncoder allocated by opus_encoder_create(). - * @param[in] st OpusEncoder*: State to be freed. - */ -OPUS_EXPORT void opus_encoder_destroy(OpusEncoder *st); - -/** Perform a CTL function on an Opus encoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @param st OpusEncoder*: Encoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls or - * @ref opus_encoderctls. - * @see opus_genericctls - * @see opus_encoderctls - */ -OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NONNULL(1); -/**@}*/ - -/** @defgroup opus_decoder Opus Decoder - * @{ - * - * @brief This page describes the process and functions used to decode Opus. - * - * The decoding process also starts with creating a decoder - * state. This can be done with: - * @code - * int error; - * OpusDecoder *dec; - * dec = opus_decoder_create(Fs, channels, &error); - * @endcode - * where - * @li Fs is the sampling rate and must be 8000, 12000, 16000, 24000, or 48000 - * @li channels is the number of channels (1 or 2) - * @li error will hold the error code in case of failure (or #OPUS_OK on success) - * @li the return value is a newly created decoder state to be used for decoding - * - * While opus_decoder_create() allocates memory for the state, it's also possible - * to initialize pre-allocated memory: - * @code - * int size; - * int error; - * OpusDecoder *dec; - * size = opus_decoder_get_size(channels); - * dec = malloc(size); - * error = opus_decoder_init(dec, Fs, channels); - * @endcode - * where opus_decoder_get_size() returns the required size for the decoder state. Note that - * future versions of this code may change the size, so no assuptions should be made about it. - * - * The decoder state is always continuous in memory and only a shallow copy is sufficient - * to copy it (e.g. memcpy()) - * - * To decode a frame, opus_decode() or opus_decode_float() must be called with a packet of compressed audio data: - * @code - * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0); - * @endcode - * where - * - * @li packet is the byte array containing the compressed data - * @li len is the exact number of bytes contained in the packet - * @li decoded is the decoded audio data in opus_int16 (or float for opus_decode_float()) - * @li max_size is the max duration of the frame in samples (per channel) that can fit into the decoded_frame array - * - * opus_decode() and opus_decode_float() return the number of samples (per channel) decoded from the packet. - * If that value is negative, then an error has occurred. This can occur if the packet is corrupted or if the audio - * buffer is too small to hold the decoded audio. - * - * Opus is a stateful codec with overlapping blocks and as a result Opus - * packets are not coded independently of each other. Packets must be - * passed into the decoder serially and in the correct order for a correct - * decode. Lost packets can be replaced with loss concealment by calling - * the decoder with a null pointer and zero length for the missing packet. - * - * A single codec state may only be accessed from a single thread at - * a time and any required locking must be performed by the caller. Separate - * streams must be decoded with separate decoder states and can be decoded - * in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK - * defined. - * - */ - -/** Opus decoder state. - * This contains the complete state of an Opus decoder. - * It is position independent and can be freely copied. - * @see opus_decoder_create,opus_decoder_init - */ -typedef struct OpusDecoder OpusDecoder; - -/** Gets the size of an OpusDecoder structure. - * @param [in] channels int: Number of channels. - * This must be 1 or 2. - * @returns The size in bytes. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_size(int channels); - -/** Allocates and initializes a decoder state. - * @param [in] Fs opus_int32: Sample rate to decode at (Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels int: Number of channels (1 or 2) to decode - * @param [out] error int*: #OPUS_OK Success or @ref opus_errorcodes - * - * Internally Opus stores data at 48000 Hz, so that should be the default - * value for Fs. However, the decoder can efficiently decode to buffers - * at 8, 12, 16, and 24 kHz so if for some reason the caller cannot use - * data at the full sample rate, or knows the compressed data doesn't - * use the full frequency range, it can request decoding at a reduced - * rate. Likewise, the decoder is capable of filling in either mono or - * interleaved stereo pcm buffers, at the caller's request. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusDecoder *opus_decoder_create( - opus_int32 Fs, - int channels, - int *error -); - -/** Initializes a previously allocated decoder state. - * The state must be at least the size returned by opus_decoder_get_size(). - * This is intended for applications which use their own allocator instead of malloc. @see opus_decoder_create,opus_decoder_get_size - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @param [in] st OpusDecoder*: Decoder state. - * @param [in] Fs opus_int32: Sampling rate to decode to (Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels int: Number of channels (1 or 2) to decode - * @retval #OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_EXPORT int opus_decoder_init( - OpusDecoder *st, - opus_int32 Fs, - int channels -) OPUS_ARG_NONNULL(1); - -/** Decode an Opus packet. - * @param [in] st OpusDecoder*: Decoder state - * @param [in] data char*: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len opus_int32: Number of bytes in payload* - * @param [out] pcm opus_int16*: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(opus_int16) - * @param [in] frame_size Number of samples per channel of available space in \a pcm. - * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will - * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1), - * then frame_size needs to be exactly the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and - * FEC cases, frame_size must be a multiple of 2.5 ms. - * @param [in] decode_fec int: Flag (0 or 1) to request that any in-band forward error correction data be - * decoded. If no such data is available, the frame is decoded as if it were lost. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode( - OpusDecoder *st, - const unsigned char *data, - opus_int32 len, - opus_int16 *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Decode an Opus packet with floating point output. - * @param [in] st OpusDecoder*: Decoder state - * @param [in] data char*: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len opus_int32: Number of bytes in payload - * @param [out] pcm float*: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(float) - * @param [in] frame_size Number of samples per channel of available space in \a pcm. - * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will - * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1), - * then frame_size needs to be exactly the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and - * FEC cases, frame_size must be a multiple of 2.5 ms. - * @param [in] decode_fec int: Flag (0 or 1) to request that any in-band forward error correction data be - * decoded. If no such data is available the frame is decoded as if it were lost. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode_float( - OpusDecoder *st, - const unsigned char *data, - opus_int32 len, - float *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on an Opus decoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @param st OpusDecoder*: Decoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls or - * @ref opus_decoderctls. - * @see opus_genericctls - * @see opus_decoderctls - */ -OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...) OPUS_ARG_NONNULL(1); - -/** Frees an OpusDecoder allocated by opus_decoder_create(). - * @param[in] st OpusDecoder*: State to be freed. - */ -OPUS_EXPORT void opus_decoder_destroy(OpusDecoder *st); - -/** Parse an opus packet into one or more frames. - * Opus_decode will perform this operation internally so most applications do - * not need to use this function. - * This function does not copy the frames, the returned pointers are pointers into - * the input packet. - * @param [in] data char*: Opus packet to be parsed - * @param [in] len opus_int32: size of data - * @param [out] out_toc char*: TOC pointer - * @param [out] frames char*[48] encapsulated frames - * @param [out] size opus_int16[48] sizes of the encapsulated frames - * @param [out] payload_offset int*: returns the position of the payload within the packet (in bytes) - * @returns number of frames - */ -OPUS_EXPORT int opus_packet_parse( - const unsigned char *data, - opus_int32 len, - unsigned char *out_toc, - const unsigned char *frames[48], - opus_int16 size[48], - int *payload_offset -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Gets the bandwidth of an Opus packet. - * @param [in] data char*: Opus packet - * @retval OPUS_BANDWIDTH_NARROWBAND Narrowband (4kHz bandpass) - * @retval OPUS_BANDWIDTH_MEDIUMBAND Mediumband (6kHz bandpass) - * @retval OPUS_BANDWIDTH_WIDEBAND Wideband (8kHz bandpass) - * @retval OPUS_BANDWIDTH_SUPERWIDEBAND Superwideband (12kHz bandpass) - * @retval OPUS_BANDWIDTH_FULLBAND Fullband (20kHz bandpass) - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_bandwidth(const unsigned char *data) OPUS_ARG_NONNULL(1); - -/** Gets the number of samples per frame from an Opus packet. - * @param [in] data char*: Opus packet. - * This must contain at least one byte of - * data. - * @param [in] Fs opus_int32: Sampling rate in Hz. - * This must be a multiple of 400, or - * inaccurate results will be returned. - * @returns Number of samples per frame. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_samples_per_frame(const unsigned char *data, opus_int32 Fs) OPUS_ARG_NONNULL(1); - -/** Gets the number of channels from an Opus packet. - * @param [in] data char*: Opus packet - * @returns Number of channels - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_channels(const unsigned char *data) OPUS_ARG_NONNULL(1); - -/** Gets the number of frames in an Opus packet. - * @param [in] packet char*: Opus packet - * @param [in] len opus_int32: Length of packet - * @returns Number of frames - * @retval OPUS_BAD_ARG Insufficient data was passed to the function - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1); - -/** Gets the number of samples of an Opus packet. - * @param [in] packet char*: Opus packet - * @param [in] len opus_int32: Length of packet - * @param [in] Fs opus_int32: Sampling rate in Hz. - * This must be a multiple of 400, or - * inaccurate results will be returned. - * @returns Number of samples - * @retval OPUS_BAD_ARG Insufficient data was passed to the function - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, opus_int32 Fs) OPUS_ARG_NONNULL(1); - -/** Gets the number of samples of an Opus packet. - * @param [in] dec OpusDecoder*: Decoder state - * @param [in] packet char*: Opus packet - * @param [in] len opus_int32: Length of packet - * @returns Number of samples - * @retval OPUS_BAD_ARG Insufficient data was passed to the function - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); - -/** Applies soft-clipping to bring a float signal within the [-1,1] range. If - * the signal is already in that range, nothing is done. If there are values - * outside of [-1,1], then the signal is clipped as smoothly as possible to - * both fit in the range and avoid creating excessive distortion in the - * process. - * @param [in,out] pcm float*: Input PCM and modified PCM - * @param [in] frame_size int Number of samples per channel to process - * @param [in] channels int: Number of channels - * @param [in,out] softclip_mem float*: State memory for the soft clipping process (one float per channel, initialized to zero) - */ -OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, float *softclip_mem); - - -/**@}*/ - -/** @defgroup opus_repacketizer Repacketizer - * @{ - * - * The repacketizer can be used to merge multiple Opus packets into a single - * packet or alternatively to split Opus packets that have previously been - * merged. Splitting valid Opus packets is always guaranteed to succeed, - * whereas merging valid packets only succeeds if all frames have the same - * mode, bandwidth, and frame size, and when the total duration of the merged - * packet is no more than 120 ms. The 120 ms limit comes from the - * specification and limits decoder memory requirements at a point where - * framing overhead becomes negligible. - * - * The repacketizer currently only operates on elementary Opus - * streams. It will not manipualte multistream packets successfully, except in - * the degenerate case where they consist of data from a single stream. - * - * The repacketizing process starts with creating a repacketizer state, either - * by calling opus_repacketizer_create() or by allocating the memory yourself, - * e.g., - * @code - * OpusRepacketizer *rp; - * rp = (OpusRepacketizer*)malloc(opus_repacketizer_get_size()); - * if (rp != NULL) - * opus_repacketizer_init(rp); - * @endcode - * - * Then the application should submit packets with opus_repacketizer_cat(), - * extract new packets with opus_repacketizer_out() or - * opus_repacketizer_out_range(), and then reset the state for the next set of - * input packets via opus_repacketizer_init(). - * - * For example, to split a sequence of packets into individual frames: - * @code - * unsigned char *data; - * int len; - * while (get_next_packet(&data, &len)) - * { - * unsigned char out[1276]; - * opus_int32 out_len; - * int nb_frames; - * int err; - * int i; - * err = opus_repacketizer_cat(rp, data, len); - * if (err != OPUS_OK) - * { - * release_packet(data); - * return err; - * } - * nb_frames = opus_repacketizer_get_nb_frames(rp); - * for (i = 0; i < nb_frames; i++) - * { - * out_len = opus_repacketizer_out_range(rp, i, i+1, out, sizeof(out)); - * if (out_len < 0) - * { - * release_packet(data); - * return (int)out_len; - * } - * output_next_packet(out, out_len); - * } - * opus_repacketizer_init(rp); - * release_packet(data); - * } - * @endcode - * - * Alternatively, to combine a sequence of frames into packets that each - * contain up to TARGET_DURATION_MS milliseconds of data: - * @code - * // The maximum number of packets with duration TARGET_DURATION_MS occurs - * // when the frame size is 2.5 ms, for a total of (TARGET_DURATION_MS*2/5) - * // packets. - * unsigned char *data[(TARGET_DURATION_MS*2/5)+1]; - * opus_int32 len[(TARGET_DURATION_MS*2/5)+1]; - * int nb_packets; - * unsigned char out[1277*(TARGET_DURATION_MS*2/2)]; - * opus_int32 out_len; - * int prev_toc; - * nb_packets = 0; - * while (get_next_packet(data+nb_packets, len+nb_packets)) - * { - * int nb_frames; - * int err; - * nb_frames = opus_packet_get_nb_frames(data[nb_packets], len[nb_packets]); - * if (nb_frames < 1) - * { - * release_packets(data, nb_packets+1); - * return nb_frames; - * } - * nb_frames += opus_repacketizer_get_nb_frames(rp); - * // If adding the next packet would exceed our target, or it has an - * // incompatible TOC sequence, output the packets we already have before - * // submitting it. - * // N.B., The nb_packets > 0 check ensures we've submitted at least one - * // packet since the last call to opus_repacketizer_init(). Otherwise a - * // single packet longer than TARGET_DURATION_MS would cause us to try to - * // output an (invalid) empty packet. It also ensures that prev_toc has - * // been set to a valid value. Additionally, len[nb_packets] > 0 is - * // guaranteed by the call to opus_packet_get_nb_frames() above, so the - * // reference to data[nb_packets][0] should be valid. - * if (nb_packets > 0 && ( - * ((prev_toc & 0xFC) != (data[nb_packets][0] & 0xFC)) || - * opus_packet_get_samples_per_frame(data[nb_packets], 48000)*nb_frames > - * TARGET_DURATION_MS*48)) - * { - * out_len = opus_repacketizer_out(rp, out, sizeof(out)); - * if (out_len < 0) - * { - * release_packets(data, nb_packets+1); - * return (int)out_len; - * } - * output_next_packet(out, out_len); - * opus_repacketizer_init(rp); - * release_packets(data, nb_packets); - * data[0] = data[nb_packets]; - * len[0] = len[nb_packets]; - * nb_packets = 0; - * } - * err = opus_repacketizer_cat(rp, data[nb_packets], len[nb_packets]); - * if (err != OPUS_OK) - * { - * release_packets(data, nb_packets+1); - * return err; - * } - * prev_toc = data[nb_packets][0]; - * nb_packets++; - * } - * // Output the final, partial packet. - * if (nb_packets > 0) - * { - * out_len = opus_repacketizer_out(rp, out, sizeof(out)); - * release_packets(data, nb_packets); - * if (out_len < 0) - * return (int)out_len; - * output_next_packet(out, out_len); - * } - * @endcode - * - * An alternate way of merging packets is to simply call opus_repacketizer_cat() - * unconditionally until it fails. At that point, the merged packet can be - * obtained with opus_repacketizer_out() and the input packet for which - * opus_repacketizer_cat() needs to be re-added to a newly reinitialized - * repacketizer state. - */ - -typedef struct OpusRepacketizer OpusRepacketizer; - -/** Gets the size of an OpusRepacketizer structure. - * @returns The size in bytes. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_size(void); - -/** (Re)initializes a previously allocated repacketizer state. - * The state must be at least the size returned by opus_repacketizer_get_size(). - * This can be used for applications which use their own allocator instead of - * malloc(). - * It must also be called to reset the queue of packets waiting to be - * repacketized, which is necessary if the maximum packet duration of 120 ms - * is reached or if you wish to submit packets with a different Opus - * configuration (coding mode, audio bandwidth, frame size, or channel count). - * Failure to do so will prevent a new packet from being added with - * opus_repacketizer_cat(). - * @see opus_repacketizer_create - * @see opus_repacketizer_get_size - * @see opus_repacketizer_cat - * @param rp OpusRepacketizer*: The repacketizer state to - * (re)initialize. - * @returns A pointer to the same repacketizer state that was passed in. - */ -OPUS_EXPORT OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1); - -/** Allocates memory and initializes the new repacketizer with - * opus_repacketizer_init(). - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusRepacketizer *opus_repacketizer_create(void); - -/** Frees an OpusRepacketizer allocated by - * opus_repacketizer_create(). - * @param[in] rp OpusRepacketizer*: State to be freed. - */ -OPUS_EXPORT void opus_repacketizer_destroy(OpusRepacketizer *rp); - -/** Add a packet to the current repacketizer state. - * This packet must match the configuration of any packets already submitted - * for repacketization since the last call to opus_repacketizer_init(). - * This means that it must have the same coding mode, audio bandwidth, frame - * size, and channel count. - * This can be checked in advance by examining the top 6 bits of the first - * byte of the packet, and ensuring they match the top 6 bits of the first - * byte of any previously submitted packet. - * The total duration of audio in the repacketizer state also must not exceed - * 120 ms, the maximum duration of a single packet, after adding this packet. - * - * The contents of the current repacketizer state can be extracted into new - * packets using opus_repacketizer_out() or opus_repacketizer_out_range(). - * - * In order to add a packet with a different configuration or to add more - * audio beyond 120 ms, you must clear the repacketizer state by calling - * opus_repacketizer_init(). - * If a packet is too large to add to the current repacketizer state, no part - * of it is added, even if it contains multiple frames, some of which might - * fit. - * If you wish to be able to add parts of such packets, you should first use - * another repacketizer to split the packet into pieces and add them - * individually. - * @see opus_repacketizer_out_range - * @see opus_repacketizer_out - * @see opus_repacketizer_init - * @param rp OpusRepacketizer*: The repacketizer state to which to - * add the packet. - * @param[in] data const unsigned char*: The packet data. - * The application must ensure - * this pointer remains valid - * until the next call to - * opus_repacketizer_init() or - * opus_repacketizer_destroy(). - * @param len opus_int32: The number of bytes in the packet data. - * @returns An error code indicating whether or not the operation succeeded. - * @retval #OPUS_OK The packet's contents have been added to the repacketizer - * state. - * @retval #OPUS_INVALID_PACKET The packet did not have a valid TOC sequence, - * the packet's TOC sequence was not compatible - * with previously submitted packets (because - * the coding mode, audio bandwidth, frame size, - * or channel count did not match), or adding - * this packet would increase the total amount of - * audio stored in the repacketizer state to more - * than 120 ms. - */ -OPUS_EXPORT int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); - - -/** Construct a new packet from data previously submitted to the repacketizer - * state via opus_repacketizer_cat(). - * @param rp OpusRepacketizer*: The repacketizer state from which to - * construct the new packet. - * @param begin int: The index of the first frame in the current - * repacketizer state to include in the output. - * @param end int: One past the index of the last frame in the - * current repacketizer state to include in the - * output. - * @param[out] data const unsigned char*: The buffer in which to - * store the output packet. - * @param maxlen opus_int32: The maximum number of bytes to store in - * the output buffer. In order to guarantee - * success, this should be at least - * 1276 for a single frame, - * or for multiple frames, - * 1277*(end-begin). - * However, 1*(end-begin) plus - * the size of all packet data submitted to - * the repacketizer since the last call to - * opus_repacketizer_init() or - * opus_repacketizer_create() is also - * sufficient, and possibly much smaller. - * @returns The total size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BAD_ARG [begin,end) was an invalid range of - * frames (begin < 0, begin >= end, or end > - * opus_repacketizer_get_nb_frames()). - * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the - * complete output packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Return the total number of frames contained in packet data submitted to - * the repacketizer state so far via opus_repacketizer_cat() since the last - * call to opus_repacketizer_init() or opus_repacketizer_create(). - * This defines the valid range of packets that can be extracted with - * opus_repacketizer_out_range() or opus_repacketizer_out(). - * @param rp OpusRepacketizer*: The repacketizer state containing the - * frames. - * @returns The total number of frames contained in the packet data submitted - * to the repacketizer state. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1); - -/** Construct a new packet from data previously submitted to the repacketizer - * state via opus_repacketizer_cat(). - * This is a convenience routine that returns all the data submitted so far - * in a single packet. - * It is equivalent to calling - * @code - * opus_repacketizer_out_range(rp, 0, opus_repacketizer_get_nb_frames(rp), - * data, maxlen) - * @endcode - * @param rp OpusRepacketizer*: The repacketizer state from which to - * construct the new packet. - * @param[out] data const unsigned char*: The buffer in which to - * store the output packet. - * @param maxlen opus_int32: The maximum number of bytes to store in - * the output buffer. In order to guarantee - * success, this should be at least - * 1277*opus_repacketizer_get_nb_frames(rp). - * However, - * 1*opus_repacketizer_get_nb_frames(rp) - * plus the size of all packet data - * submitted to the repacketizer since the - * last call to opus_repacketizer_init() or - * opus_repacketizer_create() is also - * sufficient, and possibly much smaller. - * @returns The total size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the - * complete output packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1); - -/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence). - * @param[in,out] data const unsigned char*: The buffer containing the - * packet to pad. - * @param len opus_int32: The size of the packet. - * This must be at least 1. - * @param new_len opus_int32: The desired size of the packet after padding. - * This must be at least as large as len. - * @returns an error code - * @retval #OPUS_OK \a on success. - * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len); - -/** Remove all padding from a given Opus packet and rewrite the TOC sequence to - * minimize space usage. - * @param[in,out] data const unsigned char*: The buffer containing the - * packet to strip. - * @param len opus_int32: The size of the packet. - * This must be at least 1. - * @returns The new size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BAD_ARG \a len was less than 1. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len); - -/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence). - * @param[in,out] data const unsigned char*: The buffer containing the - * packet to pad. - * @param len opus_int32: The size of the packet. - * This must be at least 1. - * @param new_len opus_int32: The desired size of the packet after padding. - * This must be at least 1. - * @param nb_streams opus_int32: The number of streams (not channels) in the packet. - * This must be at least as large as len. - * @returns an error code - * @retval #OPUS_OK \a on success. - * @retval #OPUS_BAD_ARG \a len was less than 1. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams); - -/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to - * minimize space usage. - * @param[in,out] data const unsigned char*: The buffer containing the - * packet to strip. - * @param len opus_int32: The size of the packet. - * This must be at least 1. - * @param nb_streams opus_int32: The number of streams (not channels) in the packet. - * This must be at least 1. - * @returns The new size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams); - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_H */ diff --git a/thirdparty/opus/opus/opus_custom.h b/thirdparty/opus/opus/opus_custom.h deleted file mode 100644 index 41f36bf2fb..0000000000 --- a/thirdparty/opus/opus/opus_custom.h +++ /dev/null @@ -1,342 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008-2012 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - @file opus_custom.h - @brief Opus-Custom reference implementation API - */ - -#ifndef OPUS_CUSTOM_H -#define OPUS_CUSTOM_H - -#include "opus_defines.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef CUSTOM_MODES -# define OPUS_CUSTOM_EXPORT OPUS_EXPORT -# define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT -#else -# define OPUS_CUSTOM_EXPORT -# ifdef OPUS_BUILD -# define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE -# else -# define OPUS_CUSTOM_EXPORT_STATIC -# endif -#endif - -/** @defgroup opus_custom Opus Custom - * @{ - * Opus Custom is an optional part of the Opus specification and - * reference implementation which uses a distinct API from the regular - * API and supports frame sizes that are not normally supported.\ Use - * of Opus Custom is discouraged for all but very special applications - * for which a frame size different from 2.5, 5, 10, or 20 ms is needed - * (for either complexity or latency reasons) and where interoperability - * is less important. - * - * In addition to the interoperability limitations the use of Opus custom - * disables a substantial chunk of the codec and generally lowers the - * quality available at a given bitrate. Normally when an application needs - * a different frame size from the codec it should buffer to match the - * sizes but this adds a small amount of delay which may be important - * in some very low latency applications. Some transports (especially - * constant rate RF transports) may also work best with frames of - * particular durations. - * - * Libopus only supports custom modes if they are enabled at compile time. - * - * The Opus Custom API is similar to the regular API but the - * @ref opus_encoder_create and @ref opus_decoder_create calls take - * an additional mode parameter which is a structure produced by - * a call to @ref opus_custom_mode_create. Both the encoder and decoder - * must create a mode using the same sample rate (fs) and frame size - * (frame size) so these parameters must either be signaled out of band - * or fixed in a particular implementation. - * - * Similar to regular Opus the custom modes support on the fly frame size - * switching, but the sizes available depend on the particular frame size in - * use. For some initial frame sizes on a single on the fly size is available. - */ - -/** Contains the state of an encoder. One encoder state is needed - for each stream. It is initialized once at the beginning of the - stream. Do *not* re-initialize the state for every frame. - @brief Encoder state - */ -typedef struct OpusCustomEncoder OpusCustomEncoder; - -/** State of the decoder. One decoder state is needed for each stream. - It is initialized once at the beginning of the stream. Do *not* - re-initialize the state for every frame. - @brief Decoder state - */ -typedef struct OpusCustomDecoder OpusCustomDecoder; - -/** The mode contains all the information necessary to create an - encoder. Both the encoder and decoder need to be initialized - with exactly the same mode, otherwise the output will be - corrupted. - @brief Mode configuration - */ -typedef struct OpusCustomMode OpusCustomMode; - -/** Creates a new mode struct. This will be passed to an encoder or - * decoder. The mode MUST NOT BE DESTROYED until the encoders and - * decoders that use it are destroyed as well. - * @param [in] Fs int: Sampling rate (8000 to 96000 Hz) - * @param [in] frame_size int: Number of samples (per channel) to encode in each - * packet (64 - 1024, prime factorization must contain zero or more 2s, 3s, or 5s and no other primes) - * @param [out] error int*: Returned error code (if NULL, no error will be returned) - * @return A newly created mode - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error); - -/** Destroys a mode struct. Only call this after all encoders and - * decoders using this mode are destroyed as well. - * @param [in] mode OpusCustomMode*: Mode to be freed. - */ -OPUS_CUSTOM_EXPORT void opus_custom_mode_destroy(OpusCustomMode *mode); - - -#if !defined(OPUS_BUILD) || defined(CELT_ENCODER_C) - -/* Encoder */ -/** Gets the size of an OpusCustomEncoder structure. - * @param [in] mode OpusCustomMode *: Mode configuration - * @param [in] channels int: Number of channels - * @returns size - */ -OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size( - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1); - -# ifdef CUSTOM_MODES -/** Initializes a previously allocated encoder state - * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size. - * This is intended for applications which use their own allocator instead of malloc. - * @see opus_custom_encoder_create(),opus_custom_encoder_get_size() - * To reset a previously initialized state use the OPUS_RESET_STATE CTL. - * @param [in] st OpusCustomEncoder*: Encoder state - * @param [in] mode OpusCustomMode *: Contains all the information about the characteristics of - * the stream (must be the same characteristics as used for the - * decoder) - * @param [in] channels int: Number of channels - * @return OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT int opus_custom_encoder_init( - OpusCustomEncoder *st, - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); -# endif -#endif - - -/** Creates a new encoder state. Each stream needs its own encoder - * state (can't be shared across simultaneous streams). - * @param [in] mode OpusCustomMode*: Contains all the information about the characteristics of - * the stream (must be the same characteristics as used for the - * decoder) - * @param [in] channels int: Number of channels - * @param [out] error int*: Returns an error code - * @return Newly created encoder state. -*/ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encoder_create( - const OpusCustomMode *mode, - int channels, - int *error -) OPUS_ARG_NONNULL(1); - - -/** Destroys a an encoder state. - * @param[in] st OpusCustomEncoder*: State to be freed. - */ -OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st); - -/** Encodes a frame of audio. - * @param [in] st OpusCustomEncoder*: Encoder state - * @param [in] pcm float*: PCM audio in float format, with a normal range of +/-1.0. - * Samples with a range beyond +/-1.0 are supported but will - * be clipped by decoders using the integer API and should - * only be used if it is known that the far end supports - * extended dynamic range. There must be exactly - * frame_size samples per channel. - * @param [in] frame_size int: Number of samples per frame of input signal - * @param [out] compressed char *: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long. - * @param [in] maxCompressedBytes int: Maximum number of bytes to use for compressing the frame - * (can change from one frame to another) - * @return Number of bytes written to "compressed". - * If negative, an error has occurred (see error codes). It is IMPORTANT that - * the length returned be somehow transmitted to the decoder. Otherwise, no - * decoding is possible. - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode_float( - OpusCustomEncoder *st, - const float *pcm, - int frame_size, - unsigned char *compressed, - int maxCompressedBytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Encodes a frame of audio. - * @param [in] st OpusCustomEncoder*: Encoder state - * @param [in] pcm opus_int16*: PCM audio in signed 16-bit format (native endian). - * There must be exactly frame_size samples per channel. - * @param [in] frame_size int: Number of samples per frame of input signal - * @param [out] compressed char *: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long. - * @param [in] maxCompressedBytes int: Maximum number of bytes to use for compressing the frame - * (can change from one frame to another) - * @return Number of bytes written to "compressed". - * If negative, an error has occurred (see error codes). It is IMPORTANT that - * the length returned be somehow transmitted to the decoder. Otherwise, no - * decoding is possible. - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode( - OpusCustomEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *compressed, - int maxCompressedBytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on an Opus custom encoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @see opus_encoderctls - */ -OPUS_CUSTOM_EXPORT int opus_custom_encoder_ctl(OpusCustomEncoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1); - - -#if !defined(OPUS_BUILD) || defined(CELT_DECODER_C) -/* Decoder */ - -/** Gets the size of an OpusCustomDecoder structure. - * @param [in] mode OpusCustomMode *: Mode configuration - * @param [in] channels int: Number of channels - * @returns size - */ -OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_decoder_get_size( - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1); - -/** Initializes a previously allocated decoder state - * The memory pointed to by st must be the size returned by opus_custom_decoder_get_size. - * This is intended for applications which use their own allocator instead of malloc. - * @see opus_custom_decoder_create(),opus_custom_decoder_get_size() - * To reset a previously initialized state use the OPUS_RESET_STATE CTL. - * @param [in] st OpusCustomDecoder*: Decoder state - * @param [in] mode OpusCustomMode *: Contains all the information about the characteristics of - * the stream (must be the same characteristics as used for the - * encoder) - * @param [in] channels int: Number of channels - * @return OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT_STATIC int opus_custom_decoder_init( - OpusCustomDecoder *st, - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); - -#endif - - -/** Creates a new decoder state. Each stream needs its own decoder state (can't - * be shared across simultaneous streams). - * @param [in] mode OpusCustomMode: Contains all the information about the characteristics of the - * stream (must be the same characteristics as used for the encoder) - * @param [in] channels int: Number of channels - * @param [out] error int*: Returns an error code - * @return Newly created decoder state. - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decoder_create( - const OpusCustomMode *mode, - int channels, - int *error -) OPUS_ARG_NONNULL(1); - -/** Destroys a an decoder state. - * @param[in] st OpusCustomDecoder*: State to be freed. - */ -OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st); - -/** Decode an opus custom frame with floating point output - * @param [in] st OpusCustomDecoder*: Decoder state - * @param [in] data char*: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len int: Number of bytes in payload - * @param [out] pcm float*: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(float) - * @param [in] frame_size Number of samples per channel of available space in *pcm. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode_float( - OpusCustomDecoder *st, - const unsigned char *data, - int len, - float *pcm, - int frame_size -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Decode an opus custom frame - * @param [in] st OpusCustomDecoder*: Decoder state - * @param [in] data char*: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len int: Number of bytes in payload - * @param [out] pcm opus_int16*: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(opus_int16) - * @param [in] frame_size Number of samples per channel of available space in *pcm. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode( - OpusCustomDecoder *st, - const unsigned char *data, - int len, - opus_int16 *pcm, - int frame_size -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on an Opus custom decoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @see opus_genericctls - */ -OPUS_CUSTOM_EXPORT int opus_custom_decoder_ctl(OpusCustomDecoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1); - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_CUSTOM_H */ diff --git a/thirdparty/opus/opus/opus_defines.h b/thirdparty/opus/opus/opus_defines.h deleted file mode 100644 index 315412dd1d..0000000000 --- a/thirdparty/opus/opus/opus_defines.h +++ /dev/null @@ -1,753 +0,0 @@ -/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - * @file opus_defines.h - * @brief Opus reference implementation constants - */ - -#ifndef OPUS_DEFINES_H -#define OPUS_DEFINES_H - -#include "opus_types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** @defgroup opus_errorcodes Error codes - * @{ - */ -/** No error @hideinitializer*/ -#define OPUS_OK 0 -/** One or more invalid/out of range arguments @hideinitializer*/ -#define OPUS_BAD_ARG -1 -/** Not enough bytes allocated in the buffer @hideinitializer*/ -#define OPUS_BUFFER_TOO_SMALL -2 -/** An internal error was detected @hideinitializer*/ -#define OPUS_INTERNAL_ERROR -3 -/** The compressed data passed is corrupted @hideinitializer*/ -#define OPUS_INVALID_PACKET -4 -/** Invalid/unsupported request number @hideinitializer*/ -#define OPUS_UNIMPLEMENTED -5 -/** An encoder or decoder structure is invalid or already freed @hideinitializer*/ -#define OPUS_INVALID_STATE -6 -/** Memory allocation has failed @hideinitializer*/ -#define OPUS_ALLOC_FAIL -7 -/**@}*/ - -/** @cond OPUS_INTERNAL_DOC */ -/**Export control for opus functions */ - -#ifndef OPUS_EXPORT -# if defined(WIN32) -# if defined(OPUS_BUILD) && defined(DLL_EXPORT) -# define OPUS_EXPORT __declspec(dllexport) -# else -# define OPUS_EXPORT -# endif -# elif defined(__GNUC__) && defined(OPUS_BUILD) -# define OPUS_EXPORT __attribute__ ((visibility ("default"))) -# else -# define OPUS_EXPORT -# endif -#endif - -# if !defined(OPUS_GNUC_PREREQ) -# if defined(__GNUC__)&&defined(__GNUC_MINOR__) -# define OPUS_GNUC_PREREQ(_maj,_min) \ - ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) -# else -# define OPUS_GNUC_PREREQ(_maj,_min) 0 -# endif -# endif - -#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) -# if OPUS_GNUC_PREREQ(3,0) -# define OPUS_RESTRICT __restrict__ -# elif (defined(_MSC_VER) && _MSC_VER >= 1400) -# define OPUS_RESTRICT __restrict -# else -# define OPUS_RESTRICT -# endif -#else -# define OPUS_RESTRICT restrict -#endif - -#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) -# if OPUS_GNUC_PREREQ(2,7) -# define OPUS_INLINE __inline__ -# elif (defined(_MSC_VER)) -# define OPUS_INLINE __inline -# else -# define OPUS_INLINE -# endif -#else -# define OPUS_INLINE inline -#endif - -/**Warning attributes for opus functions - * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out - * some paranoid null checks. */ -#if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4) -# define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__)) -#else -# define OPUS_WARN_UNUSED_RESULT -#endif -#if !defined(OPUS_BUILD) && defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4) -# define OPUS_ARG_NONNULL(_x) __attribute__ ((__nonnull__(_x))) -#else -# define OPUS_ARG_NONNULL(_x) -#endif - -/** These are the actual Encoder CTL ID numbers. - * They should not be used directly by applications. - * In general, SETs should be even and GETs should be odd.*/ -#define OPUS_SET_APPLICATION_REQUEST 4000 -#define OPUS_GET_APPLICATION_REQUEST 4001 -#define OPUS_SET_BITRATE_REQUEST 4002 -#define OPUS_GET_BITRATE_REQUEST 4003 -#define OPUS_SET_MAX_BANDWIDTH_REQUEST 4004 -#define OPUS_GET_MAX_BANDWIDTH_REQUEST 4005 -#define OPUS_SET_VBR_REQUEST 4006 -#define OPUS_GET_VBR_REQUEST 4007 -#define OPUS_SET_BANDWIDTH_REQUEST 4008 -#define OPUS_GET_BANDWIDTH_REQUEST 4009 -#define OPUS_SET_COMPLEXITY_REQUEST 4010 -#define OPUS_GET_COMPLEXITY_REQUEST 4011 -#define OPUS_SET_INBAND_FEC_REQUEST 4012 -#define OPUS_GET_INBAND_FEC_REQUEST 4013 -#define OPUS_SET_PACKET_LOSS_PERC_REQUEST 4014 -#define OPUS_GET_PACKET_LOSS_PERC_REQUEST 4015 -#define OPUS_SET_DTX_REQUEST 4016 -#define OPUS_GET_DTX_REQUEST 4017 -#define OPUS_SET_VBR_CONSTRAINT_REQUEST 4020 -#define OPUS_GET_VBR_CONSTRAINT_REQUEST 4021 -#define OPUS_SET_FORCE_CHANNELS_REQUEST 4022 -#define OPUS_GET_FORCE_CHANNELS_REQUEST 4023 -#define OPUS_SET_SIGNAL_REQUEST 4024 -#define OPUS_GET_SIGNAL_REQUEST 4025 -#define OPUS_GET_LOOKAHEAD_REQUEST 4027 -/* #define OPUS_RESET_STATE 4028 */ -#define OPUS_GET_SAMPLE_RATE_REQUEST 4029 -#define OPUS_GET_FINAL_RANGE_REQUEST 4031 -#define OPUS_GET_PITCH_REQUEST 4033 -#define OPUS_SET_GAIN_REQUEST 4034 -#define OPUS_GET_GAIN_REQUEST 4045 /* Should have been 4035 */ -#define OPUS_SET_LSB_DEPTH_REQUEST 4036 -#define OPUS_GET_LSB_DEPTH_REQUEST 4037 -#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039 -#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040 -#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041 -#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042 -#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043 - -/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */ - -/* Macros to trigger compilation errors when the wrong types are provided to a CTL */ -#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x)) -#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr))) -#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr))) -#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr))) -/** @endcond */ - -/** @defgroup opus_ctlvalues Pre-defined values for CTL interface - * @see opus_genericctls, opus_encoderctls - * @{ - */ -/* Values for the various encoder CTLs */ -#define OPUS_AUTO -1000 /**opus_int32: Allowed values: 0-10, inclusive. - * - * @hideinitializer */ -#define OPUS_SET_COMPLEXITY(x) OPUS_SET_COMPLEXITY_REQUEST, __opus_check_int(x) -/** Gets the encoder's complexity configuration. - * @see OPUS_SET_COMPLEXITY - * @param[out] x opus_int32 *: Returns a value in the range 0-10, - * inclusive. - * @hideinitializer */ -#define OPUS_GET_COMPLEXITY(x) OPUS_GET_COMPLEXITY_REQUEST, __opus_check_int_ptr(x) - -/** Configures the bitrate in the encoder. - * Rates from 500 to 512000 bits per second are meaningful, as well as the - * special values #OPUS_AUTO and #OPUS_BITRATE_MAX. - * The value #OPUS_BITRATE_MAX can be used to cause the codec to use as much - * rate as it can, which is useful for controlling the rate by adjusting the - * output buffer size. - * @see OPUS_GET_BITRATE - * @param[in] x opus_int32: Bitrate in bits per second. The default - * is determined based on the number of - * channels and the input sampling rate. - * @hideinitializer */ -#define OPUS_SET_BITRATE(x) OPUS_SET_BITRATE_REQUEST, __opus_check_int(x) -/** Gets the encoder's bitrate configuration. - * @see OPUS_SET_BITRATE - * @param[out] x opus_int32 *: Returns the bitrate in bits per second. - * The default is determined based on the - * number of channels and the input - * sampling rate. - * @hideinitializer */ -#define OPUS_GET_BITRATE(x) OPUS_GET_BITRATE_REQUEST, __opus_check_int_ptr(x) - -/** Enables or disables variable bitrate (VBR) in the encoder. - * The configured bitrate may not be met exactly because frames must - * be an integer number of bytes in length. - * @see OPUS_GET_VBR - * @see OPUS_SET_VBR_CONSTRAINT - * @param[in] x opus_int32: Allowed values: - *
    - *
    0
    Hard CBR. For LPC/hybrid modes at very low bit-rate, this can - * cause noticeable quality degradation.
    - *
    1
    VBR (default). The exact type of VBR is controlled by - * #OPUS_SET_VBR_CONSTRAINT.
    - *
    - * @hideinitializer */ -#define OPUS_SET_VBR(x) OPUS_SET_VBR_REQUEST, __opus_check_int(x) -/** Determine if variable bitrate (VBR) is enabled in the encoder. - * @see OPUS_SET_VBR - * @see OPUS_GET_VBR_CONSTRAINT - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    0
    Hard CBR.
    - *
    1
    VBR (default). The exact type of VBR may be retrieved via - * #OPUS_GET_VBR_CONSTRAINT.
    - *
    - * @hideinitializer */ -#define OPUS_GET_VBR(x) OPUS_GET_VBR_REQUEST, __opus_check_int_ptr(x) - -/** Enables or disables constrained VBR in the encoder. - * This setting is ignored when the encoder is in CBR mode. - * @warning Only the MDCT mode of Opus currently heeds the constraint. - * Speech mode ignores it completely, hybrid mode may fail to obey it - * if the LPC layer uses more bitrate than the constraint would have - * permitted. - * @see OPUS_GET_VBR_CONSTRAINT - * @see OPUS_SET_VBR - * @param[in] x opus_int32: Allowed values: - *
    - *
    0
    Unconstrained VBR.
    - *
    1
    Constrained VBR (default). This creates a maximum of one - * frame of buffering delay assuming a transport with a - * serialization speed of the nominal bitrate.
    - *
    - * @hideinitializer */ -#define OPUS_SET_VBR_CONSTRAINT(x) OPUS_SET_VBR_CONSTRAINT_REQUEST, __opus_check_int(x) -/** Determine if constrained VBR is enabled in the encoder. - * @see OPUS_SET_VBR_CONSTRAINT - * @see OPUS_GET_VBR - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    0
    Unconstrained VBR.
    - *
    1
    Constrained VBR (default).
    - *
    - * @hideinitializer */ -#define OPUS_GET_VBR_CONSTRAINT(x) OPUS_GET_VBR_CONSTRAINT_REQUEST, __opus_check_int_ptr(x) - -/** Configures mono/stereo forcing in the encoder. - * This can force the encoder to produce packets encoded as either mono or - * stereo, regardless of the format of the input audio. This is useful when - * the caller knows that the input signal is currently a mono source embedded - * in a stereo stream. - * @see OPUS_GET_FORCE_CHANNELS - * @param[in] x opus_int32: Allowed values: - *
    - *
    #OPUS_AUTO
    Not forced (default)
    - *
    1
    Forced mono
    - *
    2
    Forced stereo
    - *
    - * @hideinitializer */ -#define OPUS_SET_FORCE_CHANNELS(x) OPUS_SET_FORCE_CHANNELS_REQUEST, __opus_check_int(x) -/** Gets the encoder's forced channel configuration. - * @see OPUS_SET_FORCE_CHANNELS - * @param[out] x opus_int32 *: - *
    - *
    #OPUS_AUTO
    Not forced (default)
    - *
    1
    Forced mono
    - *
    2
    Forced stereo
    - *
    - * @hideinitializer */ -#define OPUS_GET_FORCE_CHANNELS(x) OPUS_GET_FORCE_CHANNELS_REQUEST, __opus_check_int_ptr(x) - -/** Configures the maximum bandpass that the encoder will select automatically. - * Applications should normally use this instead of #OPUS_SET_BANDWIDTH - * (leaving that set to the default, #OPUS_AUTO). This allows the - * application to set an upper bound based on the type of input it is - * providing, but still gives the encoder the freedom to reduce the bandpass - * when the bitrate becomes too low, for better overall quality. - * @see OPUS_GET_MAX_BANDWIDTH - * @param[in] x opus_int32: Allowed values: - *
    - *
    OPUS_BANDWIDTH_NARROWBAND
    4 kHz passband
    - *
    OPUS_BANDWIDTH_MEDIUMBAND
    6 kHz passband
    - *
    OPUS_BANDWIDTH_WIDEBAND
    8 kHz passband
    - *
    OPUS_BANDWIDTH_SUPERWIDEBAND
    12 kHz passband
    - *
    OPUS_BANDWIDTH_FULLBAND
    20 kHz passband (default)
    - *
    - * @hideinitializer */ -#define OPUS_SET_MAX_BANDWIDTH(x) OPUS_SET_MAX_BANDWIDTH_REQUEST, __opus_check_int(x) - -/** Gets the encoder's configured maximum allowed bandpass. - * @see OPUS_SET_MAX_BANDWIDTH - * @param[out] x opus_int32 *: Allowed values: - *
    - *
    #OPUS_BANDWIDTH_NARROWBAND
    4 kHz passband
    - *
    #OPUS_BANDWIDTH_MEDIUMBAND
    6 kHz passband
    - *
    #OPUS_BANDWIDTH_WIDEBAND
    8 kHz passband
    - *
    #OPUS_BANDWIDTH_SUPERWIDEBAND
    12 kHz passband
    - *
    #OPUS_BANDWIDTH_FULLBAND
    20 kHz passband (default)
    - *
    - * @hideinitializer */ -#define OPUS_GET_MAX_BANDWIDTH(x) OPUS_GET_MAX_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) - -/** Sets the encoder's bandpass to a specific value. - * This prevents the encoder from automatically selecting the bandpass based - * on the available bitrate. If an application knows the bandpass of the input - * audio it is providing, it should normally use #OPUS_SET_MAX_BANDWIDTH - * instead, which still gives the encoder the freedom to reduce the bandpass - * when the bitrate becomes too low, for better overall quality. - * @see OPUS_GET_BANDWIDTH - * @param[in] x opus_int32: Allowed values: - *
    - *
    #OPUS_AUTO
    (default)
    - *
    #OPUS_BANDWIDTH_NARROWBAND
    4 kHz passband
    - *
    #OPUS_BANDWIDTH_MEDIUMBAND
    6 kHz passband
    - *
    #OPUS_BANDWIDTH_WIDEBAND
    8 kHz passband
    - *
    #OPUS_BANDWIDTH_SUPERWIDEBAND
    12 kHz passband
    - *
    #OPUS_BANDWIDTH_FULLBAND
    20 kHz passband
    - *
    - * @hideinitializer */ -#define OPUS_SET_BANDWIDTH(x) OPUS_SET_BANDWIDTH_REQUEST, __opus_check_int(x) - -/** Configures the type of signal being encoded. - * This is a hint which helps the encoder's mode selection. - * @see OPUS_GET_SIGNAL - * @param[in] x opus_int32: Allowed values: - *
    - *
    #OPUS_AUTO
    (default)
    - *
    #OPUS_SIGNAL_VOICE
    Bias thresholds towards choosing LPC or Hybrid modes.
    - *
    #OPUS_SIGNAL_MUSIC
    Bias thresholds towards choosing MDCT modes.
    - *
    - * @hideinitializer */ -#define OPUS_SET_SIGNAL(x) OPUS_SET_SIGNAL_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured signal type. - * @see OPUS_SET_SIGNAL - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    #OPUS_AUTO
    (default)
    - *
    #OPUS_SIGNAL_VOICE
    Bias thresholds towards choosing LPC or Hybrid modes.
    - *
    #OPUS_SIGNAL_MUSIC
    Bias thresholds towards choosing MDCT modes.
    - *
    - * @hideinitializer */ -#define OPUS_GET_SIGNAL(x) OPUS_GET_SIGNAL_REQUEST, __opus_check_int_ptr(x) - - -/** Configures the encoder's intended application. - * The initial value is a mandatory argument to the encoder_create function. - * @see OPUS_GET_APPLICATION - * @param[in] x opus_int32: Returns one of the following values: - *
    - *
    #OPUS_APPLICATION_VOIP
    - *
    Process signal for improved speech intelligibility.
    - *
    #OPUS_APPLICATION_AUDIO
    - *
    Favor faithfulness to the original input.
    - *
    #OPUS_APPLICATION_RESTRICTED_LOWDELAY
    - *
    Configure the minimum possible coding delay by disabling certain modes - * of operation.
    - *
    - * @hideinitializer */ -#define OPUS_SET_APPLICATION(x) OPUS_SET_APPLICATION_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured application. - * @see OPUS_SET_APPLICATION - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    #OPUS_APPLICATION_VOIP
    - *
    Process signal for improved speech intelligibility.
    - *
    #OPUS_APPLICATION_AUDIO
    - *
    Favor faithfulness to the original input.
    - *
    #OPUS_APPLICATION_RESTRICTED_LOWDELAY
    - *
    Configure the minimum possible coding delay by disabling certain modes - * of operation.
    - *
    - * @hideinitializer */ -#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x) - -/** Gets the total samples of delay added by the entire codec. - * This can be queried by the encoder and then the provided number of samples can be - * skipped on from the start of the decoder's output to provide time aligned input - * and output. From the perspective of a decoding application the real data begins this many - * samples late. - * - * The decoder contribution to this delay is identical for all decoders, but the - * encoder portion of the delay may vary from implementation to implementation, - * version to version, or even depend on the encoder's initial configuration. - * Applications needing delay compensation should call this CTL rather than - * hard-coding a value. - * @param[out] x opus_int32 *: Number of lookahead samples - * @hideinitializer */ -#define OPUS_GET_LOOKAHEAD(x) OPUS_GET_LOOKAHEAD_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's use of inband forward error correction (FEC). - * @note This is only applicable to the LPC layer - * @see OPUS_GET_INBAND_FEC - * @param[in] x opus_int32: Allowed values: - *
    - *
    0
    Disable inband FEC (default).
    - *
    1
    Enable inband FEC.
    - *
    - * @hideinitializer */ -#define OPUS_SET_INBAND_FEC(x) OPUS_SET_INBAND_FEC_REQUEST, __opus_check_int(x) -/** Gets encoder's configured use of inband forward error correction. - * @see OPUS_SET_INBAND_FEC - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    0
    Inband FEC disabled (default).
    - *
    1
    Inband FEC enabled.
    - *
    - * @hideinitializer */ -#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's expected packet loss percentage. - * Higher values trigger progressively more loss resistant behavior in the encoder - * at the expense of quality at a given bitrate in the absence of packet loss, but - * greater quality under loss. - * @see OPUS_GET_PACKET_LOSS_PERC - * @param[in] x opus_int32: Loss percentage in the range 0-100, inclusive (default: 0). - * @hideinitializer */ -#define OPUS_SET_PACKET_LOSS_PERC(x) OPUS_SET_PACKET_LOSS_PERC_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured packet loss percentage. - * @see OPUS_SET_PACKET_LOSS_PERC - * @param[out] x opus_int32 *: Returns the configured loss percentage - * in the range 0-100, inclusive (default: 0). - * @hideinitializer */ -#define OPUS_GET_PACKET_LOSS_PERC(x) OPUS_GET_PACKET_LOSS_PERC_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's use of discontinuous transmission (DTX). - * @note This is only applicable to the LPC layer - * @see OPUS_GET_DTX - * @param[in] x opus_int32: Allowed values: - *
    - *
    0
    Disable DTX (default).
    - *
    1
    Enabled DTX.
    - *
    - * @hideinitializer */ -#define OPUS_SET_DTX(x) OPUS_SET_DTX_REQUEST, __opus_check_int(x) -/** Gets encoder's configured use of discontinuous transmission. - * @see OPUS_SET_DTX - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    0
    DTX disabled (default).
    - *
    1
    DTX enabled.
    - *
    - * @hideinitializer */ -#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x) -/** Configures the depth of signal being encoded. - * - * This is a hint which helps the encoder identify silence and near-silence. - * It represents the number of significant bits of linear intensity below - * which the signal contains ignorable quantization or other noise. - * - * For example, OPUS_SET_LSB_DEPTH(14) would be an appropriate setting - * for G.711 u-law input. OPUS_SET_LSB_DEPTH(16) would be appropriate - * for 16-bit linear pcm input with opus_encode_float(). - * - * When using opus_encode() instead of opus_encode_float(), or when libopus - * is compiled for fixed-point, the encoder uses the minimum of the value - * set here and the value 16. - * - * @see OPUS_GET_LSB_DEPTH - * @param[in] x opus_int32: Input precision in bits, between 8 and 24 - * (default: 24). - * @hideinitializer */ -#define OPUS_SET_LSB_DEPTH(x) OPUS_SET_LSB_DEPTH_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured signal depth. - * @see OPUS_SET_LSB_DEPTH - * @param[out] x opus_int32 *: Input precision in bits, between 8 and - * 24 (default: 24). - * @hideinitializer */ -#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's use of variable duration frames. - * When variable duration is enabled, the encoder is free to use a shorter frame - * size than the one requested in the opus_encode*() call. - * It is then the user's responsibility - * to verify how much audio was encoded by checking the ToC byte of the encoded - * packet. The part of the audio that was not encoded needs to be resent to the - * encoder for the next call. Do not use this option unless you really - * know what you are doing. - * @see OPUS_GET_EXPERT_FRAME_DURATION - * @param[in] x opus_int32: Allowed values: - *
    - *
    OPUS_FRAMESIZE_ARG
    Select frame size from the argument (default).
    - *
    OPUS_FRAMESIZE_2_5_MS
    Use 2.5 ms frames.
    - *
    OPUS_FRAMESIZE_5_MS
    Use 5 ms frames.
    - *
    OPUS_FRAMESIZE_10_MS
    Use 10 ms frames.
    - *
    OPUS_FRAMESIZE_20_MS
    Use 20 ms frames.
    - *
    OPUS_FRAMESIZE_40_MS
    Use 40 ms frames.
    - *
    OPUS_FRAMESIZE_60_MS
    Use 60 ms frames.
    - *
    OPUS_FRAMESIZE_VARIABLE
    Optimize the frame size dynamically.
    - *
    - * @hideinitializer */ -#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured use of variable duration frames. - * @see OPUS_SET_EXPERT_FRAME_DURATION - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    OPUS_FRAMESIZE_ARG
    Select frame size from the argument (default).
    - *
    OPUS_FRAMESIZE_2_5_MS
    Use 2.5 ms frames.
    - *
    OPUS_FRAMESIZE_5_MS
    Use 5 ms frames.
    - *
    OPUS_FRAMESIZE_10_MS
    Use 10 ms frames.
    - *
    OPUS_FRAMESIZE_20_MS
    Use 20 ms frames.
    - *
    OPUS_FRAMESIZE_40_MS
    Use 40 ms frames.
    - *
    OPUS_FRAMESIZE_60_MS
    Use 60 ms frames.
    - *
    OPUS_FRAMESIZE_VARIABLE
    Optimize the frame size dynamically.
    - *
    - * @hideinitializer */ -#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x) - -/** If set to 1, disables almost all use of prediction, making frames almost - * completely independent. This reduces quality. - * @see OPUS_GET_PREDICTION_DISABLED - * @param[in] x opus_int32: Allowed values: - *
    - *
    0
    Enable prediction (default).
    - *
    1
    Disable prediction.
    - *
    - * @hideinitializer */ -#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured prediction status. - * @see OPUS_SET_PREDICTION_DISABLED - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    0
    Prediction enabled (default).
    - *
    1
    Prediction disabled.
    - *
    - * @hideinitializer */ -#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x) - -/**@}*/ - -/** @defgroup opus_genericctls Generic CTLs - * - * These macros are used with the \c opus_decoder_ctl and - * \c opus_encoder_ctl calls to generate a particular - * request. - * - * When called on an \c OpusDecoder they apply to that - * particular decoder instance. When called on an - * \c OpusEncoder they apply to the corresponding setting - * on that encoder instance, if present. - * - * Some usage examples: - * - * @code - * int ret; - * opus_int32 pitch; - * ret = opus_decoder_ctl(dec_ctx, OPUS_GET_PITCH(&pitch)); - * if (ret == OPUS_OK) return ret; - * - * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE); - * opus_decoder_ctl(dec_ctx, OPUS_RESET_STATE); - * - * opus_int32 enc_bw, dec_bw; - * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&enc_bw)); - * opus_decoder_ctl(dec_ctx, OPUS_GET_BANDWIDTH(&dec_bw)); - * if (enc_bw != dec_bw) { - * printf("packet bandwidth mismatch!\n"); - * } - * @endcode - * - * @see opus_encoder, opus_decoder_ctl, opus_encoder_ctl, opus_decoderctls, opus_encoderctls - * @{ - */ - -/** Resets the codec state to be equivalent to a freshly initialized state. - * This should be called when switching streams in order to prevent - * the back to back decoding from giving different results from - * one at a time decoding. - * @hideinitializer */ -#define OPUS_RESET_STATE 4028 - -/** Gets the final state of the codec's entropy coder. - * This is used for testing purposes, - * The encoder and decoder state should be identical after coding a payload - * (assuming no data corruption or software bugs) - * - * @param[out] x opus_uint32 *: Entropy coder state - * - * @hideinitializer */ -#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x) - -/** Gets the encoder's configured bandpass or the decoder's last bandpass. - * @see OPUS_SET_BANDWIDTH - * @param[out] x opus_int32 *: Returns one of the following values: - *
    - *
    #OPUS_AUTO
    (default)
    - *
    #OPUS_BANDWIDTH_NARROWBAND
    4 kHz passband
    - *
    #OPUS_BANDWIDTH_MEDIUMBAND
    6 kHz passband
    - *
    #OPUS_BANDWIDTH_WIDEBAND
    8 kHz passband
    - *
    #OPUS_BANDWIDTH_SUPERWIDEBAND
    12 kHz passband
    - *
    #OPUS_BANDWIDTH_FULLBAND
    20 kHz passband
    - *
    - * @hideinitializer */ -#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) - -/** Gets the sampling rate the encoder or decoder was initialized with. - * This simply returns the Fs value passed to opus_encoder_init() - * or opus_decoder_init(). - * @param[out] x opus_int32 *: Sampling rate of encoder or decoder. - * @hideinitializer - */ -#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) - -/**@}*/ - -/** @defgroup opus_decoderctls Decoder related CTLs - * @see opus_genericctls, opus_encoderctls, opus_decoder - * @{ - */ - -/** Configures decoder gain adjustment. - * Scales the decoded output by a factor specified in Q8 dB units. - * This has a maximum range of -32768 to 32767 inclusive, and returns - * OPUS_BAD_ARG otherwise. The default is zero indicating no adjustment. - * This setting survives decoder reset. - * - * gain = pow(10, x/(20.0*256)) - * - * @param[in] x opus_int32: Amount to scale PCM signal by in Q8 dB units. - * @hideinitializer */ -#define OPUS_SET_GAIN(x) OPUS_SET_GAIN_REQUEST, __opus_check_int(x) -/** Gets the decoder's configured gain adjustment. @see OPUS_SET_GAIN - * - * @param[out] x opus_int32 *: Amount to scale PCM signal by in Q8 dB units. - * @hideinitializer */ -#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x) - -/** Gets the duration (in samples) of the last packet successfully decoded or concealed. - * @param[out] x opus_int32 *: Number of samples (at current sampling rate). - * @hideinitializer */ -#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) - -/** Gets the pitch of the last decoded frame, if available. - * This can be used for any post-processing algorithm requiring the use of pitch, - * e.g. time stretching/shortening. If the last frame was not voiced, or if the - * pitch was not coded in the frame, then zero is returned. - * - * This CTL is only implemented for decoder instances. - * - * @param[out] x opus_int32 *: pitch period at 48 kHz (or 0 if not available) - * - * @hideinitializer */ -#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) - -/**@}*/ - -/** @defgroup opus_libinfo Opus library information functions - * @{ - */ - -/** Converts an opus error code into a human readable string. - * - * @param[in] error int: Error number - * @returns Error string - */ -OPUS_EXPORT const char *opus_strerror(int error); - -/** Gets the libopus version string. - * - * Applications may look for the substring "-fixed" in the version string to - * determine whether they have a fixed-point or floating-point build at - * runtime. - * - * @returns Version string - */ -OPUS_EXPORT const char *opus_get_version_string(void); -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_DEFINES_H */ diff --git a/thirdparty/opus/opus/opus_multistream.h b/thirdparty/opus/opus/opus_multistream.h deleted file mode 100644 index 3622e009fb..0000000000 --- a/thirdparty/opus/opus/opus_multistream.h +++ /dev/null @@ -1,660 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - * @file opus_multistream.h - * @brief Opus reference implementation multistream API - */ - -#ifndef OPUS_MULTISTREAM_H -#define OPUS_MULTISTREAM_H - -#include "opus.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** @cond OPUS_INTERNAL_DOC */ - -/** Macros to trigger compilation errors when the wrong types are provided to a - * CTL. */ -/**@{*/ -#define __opus_check_encstate_ptr(ptr) ((ptr) + ((ptr) - (OpusEncoder**)(ptr))) -#define __opus_check_decstate_ptr(ptr) ((ptr) + ((ptr) - (OpusDecoder**)(ptr))) -/**@}*/ - -/** These are the actual encoder and decoder CTL ID numbers. - * They should not be used directly by applications. - * In general, SETs should be even and GETs should be odd.*/ -/**@{*/ -#define OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST 5120 -#define OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST 5122 -/**@}*/ - -/** @endcond */ - -/** @defgroup opus_multistream_ctls Multistream specific encoder and decoder CTLs - * - * These are convenience macros that are specific to the - * opus_multistream_encoder_ctl() and opus_multistream_decoder_ctl() - * interface. - * The CTLs from @ref opus_genericctls, @ref opus_encoderctls, and - * @ref opus_decoderctls may be applied to a multistream encoder or decoder as - * well. - * In addition, you may retrieve the encoder or decoder state for an specific - * stream via #OPUS_MULTISTREAM_GET_ENCODER_STATE or - * #OPUS_MULTISTREAM_GET_DECODER_STATE and apply CTLs to it individually. - */ -/**@{*/ - -/** Gets the encoder state for an individual stream of a multistream encoder. - * @param[in] x opus_int32: The index of the stream whose encoder you - * wish to retrieve. - * This must be non-negative and less than - * the streams parameter used - * to initialize the encoder. - * @param[out] y OpusEncoder**: Returns a pointer to the given - * encoder state. - * @retval OPUS_BAD_ARG The index of the requested stream was out of range. - * @hideinitializer - */ -#define OPUS_MULTISTREAM_GET_ENCODER_STATE(x,y) OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST, __opus_check_int(x), __opus_check_encstate_ptr(y) - -/** Gets the decoder state for an individual stream of a multistream decoder. - * @param[in] x opus_int32: The index of the stream whose decoder you - * wish to retrieve. - * This must be non-negative and less than - * the streams parameter used - * to initialize the decoder. - * @param[out] y OpusDecoder**: Returns a pointer to the given - * decoder state. - * @retval OPUS_BAD_ARG The index of the requested stream was out of range. - * @hideinitializer - */ -#define OPUS_MULTISTREAM_GET_DECODER_STATE(x,y) OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST, __opus_check_int(x), __opus_check_decstate_ptr(y) - -/**@}*/ - -/** @defgroup opus_multistream Opus Multistream API - * @{ - * - * The multistream API allows individual Opus streams to be combined into a - * single packet, enabling support for up to 255 channels. Unlike an - * elementary Opus stream, the encoder and decoder must negotiate the channel - * configuration before the decoder can successfully interpret the data in the - * packets produced by the encoder. Some basic information, such as packet - * duration, can be computed without any special negotiation. - * - * The format for multistream Opus packets is defined in - * RFC 7845 - * and is based on the self-delimited Opus framing described in Appendix B of - * RFC 6716. - * Normal Opus packets are just a degenerate case of multistream Opus packets, - * and can be encoded or decoded with the multistream API by setting - * streams to 1 when initializing the encoder or - * decoder. - * - * Multistream Opus streams can contain up to 255 elementary Opus streams. - * These may be either "uncoupled" or "coupled", indicating that the decoder - * is configured to decode them to either 1 or 2 channels, respectively. - * The streams are ordered so that all coupled streams appear at the - * beginning. - * - * A mapping table defines which decoded channel i - * should be used for each input/output (I/O) channel j. This table is - * typically provided as an unsigned char array. - * Let i = mapping[j] be the index for I/O channel j. - * If i < 2*coupled_streams, then I/O channel j is - * encoded as the left channel of stream (i/2) if i - * is even, or as the right channel of stream (i/2) if - * i is odd. Otherwise, I/O channel j is encoded as - * mono in stream (i - coupled_streams), unless it has the special - * value 255, in which case it is omitted from the encoding entirely (the - * decoder will reproduce it as silence). Each value i must either - * be the special value 255 or be less than streams + coupled_streams. - * - * The output channels specified by the encoder - * should use the - * Vorbis - * channel ordering. A decoder may wish to apply an additional permutation - * to the mapping the encoder used to achieve a different output channel - * order (e.g. for outputing in WAV order). - * - * Each multistream packet contains an Opus packet for each stream, and all of - * the Opus packets in a single multistream packet must have the same - * duration. Therefore the duration of a multistream packet can be extracted - * from the TOC sequence of the first stream, which is located at the - * beginning of the packet, just like an elementary Opus stream: - * - * @code - * int nb_samples; - * int nb_frames; - * nb_frames = opus_packet_get_nb_frames(data, len); - * if (nb_frames < 1) - * return nb_frames; - * nb_samples = opus_packet_get_samples_per_frame(data, 48000) * nb_frames; - * @endcode - * - * The general encoding and decoding process proceeds exactly the same as in - * the normal @ref opus_encoder and @ref opus_decoder APIs. - * See their documentation for an overview of how to use the corresponding - * multistream functions. - */ - -/** Opus multistream encoder state. - * This contains the complete state of a multistream Opus encoder. - * It is position independent and can be freely copied. - * @see opus_multistream_encoder_create - * @see opus_multistream_encoder_init - */ -typedef struct OpusMSEncoder OpusMSEncoder; - -/** Opus multistream decoder state. - * This contains the complete state of a multistream Opus decoder. - * It is position independent and can be freely copied. - * @see opus_multistream_decoder_create - * @see opus_multistream_decoder_init - */ -typedef struct OpusMSDecoder OpusMSDecoder; - -/**\name Multistream encoder functions */ -/**@{*/ - -/** Gets the size of an OpusMSEncoder structure. - * @param streams int: The total number of streams to encode from the - * input. - * This must be no more than 255. - * @param coupled_streams int: Number of coupled (2 channel) streams - * to encode. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * encoded channels (streams + - * coupled_streams) must be no - * more than 255. - * @returns The size in bytes on success, or a negative error code - * (see @ref opus_errorcodes) on error. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_encoder_get_size( - int streams, - int coupled_streams -); - -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_surround_encoder_get_size( - int channels, - int mapping_family -); - - -/** Allocates and initializes a multistream encoder state. - * Call opus_multistream_encoder_destroy() to release - * this object when finished. - * @param Fs opus_int32: Sampling rate of the input signal (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels int: Number of channels in the input signal. - * This must be at most 255. - * It may be greater than the number of - * coded channels (streams + - * coupled_streams). - * @param streams int: The total number of streams to encode from the - * input. - * This must be no more than the number of channels. - * @param coupled_streams int: Number of coupled (2 channel) streams - * to encode. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * encoded channels (streams + - * coupled_streams) must be no - * more than the number of input channels. - * @param[in] mapping const unsigned char[channels]: Mapping from - * encoded channels to input channels, as described in - * @ref opus_multistream. As an extra constraint, the - * multistream encoder does not allow encoding coupled - * streams for which one channel is unused since this - * is never a good idea. - * @param application int: The target encoder application. - * This must be one of the following: - *
    - *
    #OPUS_APPLICATION_VOIP
    - *
    Process signal for improved speech intelligibility.
    - *
    #OPUS_APPLICATION_AUDIO
    - *
    Favor faithfulness to the original input.
    - *
    #OPUS_APPLICATION_RESTRICTED_LOWDELAY
    - *
    Configure the minimum possible coding delay by disabling certain modes - * of operation.
    - *
    - * @param[out] error int *: Returns #OPUS_OK on success, or an error - * code (see @ref opus_errorcodes) on - * failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_encoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application, - int *error -) OPUS_ARG_NONNULL(5); - -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_encoder_create( - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application, - int *error -) OPUS_ARG_NONNULL(5); - -/** Initialize a previously allocated multistream encoder state. - * The memory pointed to by \a st must be at least the size returned by - * opus_multistream_encoder_get_size(). - * This is intended for applications which use their own allocator instead of - * malloc. - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @see opus_multistream_encoder_create - * @see opus_multistream_encoder_get_size - * @param st OpusMSEncoder*: Multistream encoder state to initialize. - * @param Fs opus_int32: Sampling rate of the input signal (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels int: Number of channels in the input signal. - * This must be at most 255. - * It may be greater than the number of - * coded channels (streams + - * coupled_streams). - * @param streams int: The total number of streams to encode from the - * input. - * This must be no more than the number of channels. - * @param coupled_streams int: Number of coupled (2 channel) streams - * to encode. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * encoded channels (streams + - * coupled_streams) must be no - * more than the number of input channels. - * @param[in] mapping const unsigned char[channels]: Mapping from - * encoded channels to input channels, as described in - * @ref opus_multistream. As an extra constraint, the - * multistream encoder does not allow encoding coupled - * streams for which one channel is unused since this - * is never a good idea. - * @param application int: The target encoder application. - * This must be one of the following: - *
    - *
    #OPUS_APPLICATION_VOIP
    - *
    Process signal for improved speech intelligibility.
    - *
    #OPUS_APPLICATION_AUDIO
    - *
    Favor faithfulness to the original input.
    - *
    #OPUS_APPLICATION_RESTRICTED_LOWDELAY
    - *
    Configure the minimum possible coding delay by disabling certain modes - * of operation.
    - *
    - * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) - * on failure. - */ -OPUS_EXPORT int opus_multistream_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); - -OPUS_EXPORT int opus_multistream_surround_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); - -/** Encodes a multistream Opus frame. - * @param st OpusMSEncoder*: Multistream encoder state. - * @param[in] pcm const opus_int16*: The input signal as interleaved - * samples. - * This must contain - * frame_size*channels - * samples. - * @param frame_size int: Number of samples per channel in the input - * signal. - * This must be an Opus frame size for the - * encoder's sampling rate. - * For example, at 48 kHz the permitted values - * are 120, 240, 480, 960, 1920, and 2880. - * Passing in a duration of less than 10 ms - * (480 samples at 48 kHz) will prevent the - * encoder from using the LPC or hybrid modes. - * @param[out] data unsigned char*: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes opus_int32: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode( - OpusMSEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Encodes a multistream Opus frame from floating point input. - * @param st OpusMSEncoder*: Multistream encoder state. - * @param[in] pcm const float*: The input signal as interleaved - * samples with a normal range of - * +/-1.0. - * Samples with a range beyond +/-1.0 - * are supported but will be clipped by - * decoders using the integer API and - * should only be used if it is known - * that the far end supports extended - * dynamic range. - * This must contain - * frame_size*channels - * samples. - * @param frame_size int: Number of samples per channel in the input - * signal. - * This must be an Opus frame size for the - * encoder's sampling rate. - * For example, at 48 kHz the permitted values - * are 120, 240, 480, 960, 1920, and 2880. - * Passing in a duration of less than 10 ms - * (480 samples at 48 kHz) will prevent the - * encoder from using the LPC or hybrid modes. - * @param[out] data unsigned char*: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes opus_int32: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode_float( - OpusMSEncoder *st, - const float *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Frees an OpusMSEncoder allocated by - * opus_multistream_encoder_create(). - * @param st OpusMSEncoder*: Multistream encoder state to be freed. - */ -OPUS_EXPORT void opus_multistream_encoder_destroy(OpusMSEncoder *st); - -/** Perform a CTL function on a multistream Opus encoder. - * - * Generally the request and subsequent arguments are generated by a - * convenience macro. - * @param st OpusMSEncoder*: Multistream encoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls, - * @ref opus_encoderctls, or @ref opus_multistream_ctls. - * @see opus_genericctls - * @see opus_encoderctls - * @see opus_multistream_ctls - */ -OPUS_EXPORT int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) OPUS_ARG_NONNULL(1); - -/**@}*/ - -/**\name Multistream decoder functions */ -/**@{*/ - -/** Gets the size of an OpusMSDecoder structure. - * @param streams int: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams int: Number streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (streams + - * coupled_streams) must be no - * more than 255. - * @returns The size in bytes on success, or a negative error code - * (see @ref opus_errorcodes) on error. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_decoder_get_size( - int streams, - int coupled_streams -); - -/** Allocates and initializes a multistream decoder state. - * Call opus_multistream_decoder_destroy() to release - * this object when finished. - * @param Fs opus_int32: Sampling rate to decode at (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels int: Number of channels to output. - * This must be at most 255. - * It may be different from the number of coded - * channels (streams + - * coupled_streams). - * @param streams int: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams int: Number of streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (streams + - * coupled_streams) must be no - * more than 255. - * @param[in] mapping const unsigned char[channels]: Mapping from - * coded channels to output channels, as described in - * @ref opus_multistream. - * @param[out] error int *: Returns #OPUS_OK on success, or an error - * code (see @ref opus_errorcodes) on - * failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSDecoder *opus_multistream_decoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int *error -) OPUS_ARG_NONNULL(5); - -/** Intialize a previously allocated decoder state object. - * The memory pointed to by \a st must be at least the size returned by - * opus_multistream_encoder_get_size(). - * This is intended for applications which use their own allocator instead of - * malloc. - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @see opus_multistream_decoder_create - * @see opus_multistream_deocder_get_size - * @param st OpusMSEncoder*: Multistream encoder state to initialize. - * @param Fs opus_int32: Sampling rate to decode at (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels int: Number of channels to output. - * This must be at most 255. - * It may be different from the number of coded - * channels (streams + - * coupled_streams). - * @param streams int: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams int: Number of streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (streams + - * coupled_streams) must be no - * more than 255. - * @param[in] mapping const unsigned char[channels]: Mapping from - * coded channels to output channels, as described in - * @ref opus_multistream. - * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) - * on failure. - */ -OPUS_EXPORT int opus_multistream_decoder_init( - OpusMSDecoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); - -/** Decode a multistream Opus packet. - * @param st OpusMSDecoder*: Multistream decoder state. - * @param[in] data const unsigned char*: Input payload. - * Use a NULL - * pointer to indicate packet - * loss. - * @param len opus_int32: Number of bytes in payload. - * @param[out] pcm opus_int16*: Output signal, with interleaved - * samples. - * This must contain room for - * frame_size*channels - * samples. - * @param frame_size int: The number of samples per channel of - * available space in \a pcm. - * If this is less than the maximum packet duration - * (120 ms; 5760 for 48kHz), this function will not be capable - * of decoding some packets. In the case of PLC (data==NULL) - * or FEC (decode_fec=1), then frame_size needs to be exactly - * the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the - * next incoming packet. For the PLC and FEC cases, frame_size - * must be a multiple of 2.5 ms. - * @param decode_fec int: Flag (0 or 1) to request that any in-band - * forward error correction data be decoded. - * If no such data is available, the frame is - * decoded as if it were lost. - * @returns Number of samples decoded on success or a negative error code - * (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode( - OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - opus_int16 *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Decode a multistream Opus packet with floating point output. - * @param st OpusMSDecoder*: Multistream decoder state. - * @param[in] data const unsigned char*: Input payload. - * Use a NULL - * pointer to indicate packet - * loss. - * @param len opus_int32: Number of bytes in payload. - * @param[out] pcm opus_int16*: Output signal, with interleaved - * samples. - * This must contain room for - * frame_size*channels - * samples. - * @param frame_size int: The number of samples per channel of - * available space in \a pcm. - * If this is less than the maximum packet duration - * (120 ms; 5760 for 48kHz), this function will not be capable - * of decoding some packets. In the case of PLC (data==NULL) - * or FEC (decode_fec=1), then frame_size needs to be exactly - * the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the - * next incoming packet. For the PLC and FEC cases, frame_size - * must be a multiple of 2.5 ms. - * @param decode_fec int: Flag (0 or 1) to request that any in-band - * forward error correction data be decoded. - * If no such data is available, the frame is - * decoded as if it were lost. - * @returns Number of samples decoded on success or a negative error code - * (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode_float( - OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - float *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on a multistream Opus decoder. - * - * Generally the request and subsequent arguments are generated by a - * convenience macro. - * @param st OpusMSDecoder*: Multistream decoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls, - * @ref opus_decoderctls, or @ref opus_multistream_ctls. - * @see opus_genericctls - * @see opus_decoderctls - * @see opus_multistream_ctls - */ -OPUS_EXPORT int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) OPUS_ARG_NONNULL(1); - -/** Frees an OpusMSDecoder allocated by - * opus_multistream_decoder_create(). - * @param st OpusMSDecoder: Multistream decoder state to be freed. - */ -OPUS_EXPORT void opus_multistream_decoder_destroy(OpusMSDecoder *st); - -/**@}*/ - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_MULTISTREAM_H */ diff --git a/thirdparty/opus/opus/opus_types.h b/thirdparty/opus/opus/opus_types.h deleted file mode 100644 index b28e03aea2..0000000000 --- a/thirdparty/opus/opus/opus_types.h +++ /dev/null @@ -1,159 +0,0 @@ -/* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */ -/* Modified by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -/* opus_types.h based on ogg_types.h from libogg */ - -/** - @file opus_types.h - @brief Opus reference implementation types -*/ -#ifndef OPUS_TYPES_H -#define OPUS_TYPES_H - -/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */ -#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H)) -#include - - typedef int16_t opus_int16; - typedef uint16_t opus_uint16; - typedef int32_t opus_int32; - typedef uint32_t opus_uint32; -#elif defined(_WIN32) - -# if defined(__CYGWIN__) -# include <_G_config.h> - typedef _G_int32_t opus_int32; - typedef _G_uint32_t opus_uint32; - typedef _G_int16 opus_int16; - typedef _G_uint16 opus_uint16; -# elif defined(__MINGW32__) - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; -# elif defined(__MWERKS__) - typedef int opus_int32; - typedef unsigned int opus_uint32; - typedef short opus_int16; - typedef unsigned short opus_uint16; -# else - /* MSVC/Borland */ - typedef __int32 opus_int32; - typedef unsigned __int32 opus_uint32; - typedef __int16 opus_int16; - typedef unsigned __int16 opus_uint16; -# endif - -#elif defined(__MACOS__) - -# include - typedef SInt16 opus_int16; - typedef UInt16 opus_uint16; - typedef SInt32 opus_int32; - typedef UInt32 opus_uint32; - -#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */ - -# include - typedef int16_t opus_int16; - typedef u_int16_t opus_uint16; - typedef int32_t opus_int32; - typedef u_int32_t opus_uint32; - -#elif defined(__BEOS__) - - /* Be */ -# include - typedef int16 opus_int16; - typedef u_int16 opus_uint16; - typedef int32_t opus_int32; - typedef u_int32_t opus_uint32; - -#elif defined (__EMX__) - - /* OS/2 GCC */ - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#elif defined (DJGPP) - - /* DJGPP */ - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#elif defined(R5900) - - /* PS2 EE */ - typedef int opus_int32; - typedef unsigned opus_uint32; - typedef short opus_int16; - typedef unsigned short opus_uint16; - -#elif defined(__SYMBIAN32__) - - /* Symbian GCC */ - typedef signed short opus_int16; - typedef unsigned short opus_uint16; - typedef signed int opus_int32; - typedef unsigned int opus_uint32; - -#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) - - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef long opus_int32; - typedef unsigned long opus_uint32; - -#elif defined(CONFIG_TI_C6X) - - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#else - - /* Give up, take a reasonable guess */ - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#endif - -#define opus_int int /* used for counters etc; at least 16 bits */ -#define opus_int64 long long -#define opus_int8 signed char - -#define opus_uint unsigned int /* used for counters etc; at least 16 bits */ -#define opus_uint64 unsigned long long -#define opus_uint8 unsigned char - -#endif /* OPUS_TYPES_H */ diff --git a/thirdparty/opus/opus/opusfile.h b/thirdparty/opus/opus/opusfile.h deleted file mode 100644 index 4bf2fba926..0000000000 --- a/thirdparty/opus/opus/opusfile.h +++ /dev/null @@ -1,2157 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: stdio-based convenience library for opening/seeking/decoding - last mod: $Id: vorbisfile.h 17182 2010-04-29 03:48:32Z xiphmont $ - - ********************************************************************/ -#if !defined(_opusfile_h) -# define _opusfile_h (1) - -/**\mainpage - \section Introduction - - This is the documentation for the libopusfile C API. - - The libopusfile package provides a convenient high-level API for - decoding and basic manipulation of all Ogg Opus audio streams. - libopusfile is implemented as a layer on top of Xiph.Org's - reference - libogg - and - libopus - libraries. - - libopusfile provides several sets of built-in routines for - file/stream access, and may also use custom stream I/O routines provided by - the embedded environment. - There are built-in I/O routines provided for ANSI-compliant - stdio (FILE *), memory buffers, and URLs - (including URLs, plus optionally and URLs). - - \section Organization - - The main API is divided into several sections: - - \ref stream_open_close - - \ref stream_info - - \ref stream_decoding - - \ref stream_seeking - - Several additional sections are not tied to the main API. - - \ref stream_callbacks - - \ref header_info - - \ref error_codes - - \section Overview - - The libopusfile API always decodes files to 48 kHz. - The original sample rate is not preserved by the lossy compression, though - it is stored in the header to allow you to resample to it after decoding - (the libopusfile API does not currently provide a resampler, - but the - the - Speex resampler is a good choice if you need one). - In general, if you are playing back the audio, you should leave it at - 48 kHz, provided your audio hardware supports it. - When decoding to a file, it may be worth resampling back to the original - sample rate, so as not to surprise users who might not expect the sample - rate to change after encoding to Opus and decoding. - - Opus files can contain anywhere from 1 to 255 channels of audio. - The channel mappings for up to 8 channels are the same as the - Vorbis - mappings. - A special stereo API can convert everything to 2 channels, making it simple - to support multichannel files in an application which only has stereo - output. - Although the libopusfile ABI provides support for the theoretical - maximum number of channels, the current implementation does not support - files with more than 8 channels, as they do not have well-defined channel - mappings. - - Like all Ogg files, Opus files may be "chained". - That is, multiple Opus files may be combined into a single, longer file just - by concatenating the original files. - This is commonly done in internet radio streaming, as it allows the title - and artist to be updated each time the song changes, since each link in the - chain includes its own set of metadata. - - libopusfile fully supports chained files. - It will decode the first Opus stream found in each link of a chained file - (ignoring any other streams that might be concurrently multiplexed with it, - such as a video stream). - - The channel count can also change between links. - If your application is not prepared to deal with this, it can use the stereo - API to ensure the audio from all links will always get decoded into a - common format. - Since libopusfile always decodes to 48 kHz, you do not have to - worry about the sample rate changing between links (as was possible with - Vorbis). - This makes application support for chained files with libopusfile - very easy.*/ - -# if defined(__cplusplus) -extern "C" { -# endif - -# include -# include -# include -# include - -/**@cond PRIVATE*/ - -/*Enable special features for gcc and gcc-compatible compilers.*/ -# if !defined(OP_GNUC_PREREQ) -# if defined(__GNUC__)&&defined(__GNUC_MINOR__) -# define OP_GNUC_PREREQ(_maj,_min) \ - ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) -# else -# define OP_GNUC_PREREQ(_maj,_min) 0 -# endif -# endif - -# if OP_GNUC_PREREQ(4,0) -# pragma GCC visibility push(default) -# endif - -typedef struct OpusHead OpusHead; -typedef struct OpusTags OpusTags; -typedef struct OpusPictureTag OpusPictureTag; -typedef struct OpusServerInfo OpusServerInfo; -typedef struct OpusFileCallbacks OpusFileCallbacks; -typedef struct OggOpusFile OggOpusFile; - -/*Warning attributes for libopusfile functions.*/ -# if OP_GNUC_PREREQ(3,4) -# define OP_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# else -# define OP_WARN_UNUSED_RESULT -# endif -# if OP_GNUC_PREREQ(3,4) -# define OP_ARG_NONNULL(_x) __attribute__((__nonnull__(_x))) -# else -# define OP_ARG_NONNULL(_x) -# endif - -/**@endcond*/ - -/**\defgroup error_codes Error Codes*/ -/*@{*/ -/**\name List of possible error codes - Many of the functions in this library return a negative error code when a - function fails. - This list provides a brief explanation of the common errors. - See each individual function for more details on what a specific error code - means in that context.*/ -/*@{*/ - -/**A request did not succeed.*/ -#define OP_FALSE (-1) -/*Currently not used externally.*/ -#define OP_EOF (-2) -/**There was a hole in the page sequence numbers (e.g., a page was corrupt or - missing).*/ -#define OP_HOLE (-3) -/**An underlying read, seek, or tell operation failed when it should have - succeeded.*/ -#define OP_EREAD (-128) -/**A NULL pointer was passed where one was unexpected, or an - internal memory allocation failed, or an internal library error was - encountered.*/ -#define OP_EFAULT (-129) -/**The stream used a feature that is not implemented, such as an unsupported - channel family.*/ -#define OP_EIMPL (-130) -/**One or more parameters to a function were invalid.*/ -#define OP_EINVAL (-131) -/**A purported Ogg Opus stream did not begin with an Ogg page, a purported - header packet did not start with one of the required strings, "OpusHead" or - "OpusTags", or a link in a chained file was encountered that did not - contain any logical Opus streams.*/ -#define OP_ENOTFORMAT (-132) -/**A required header packet was not properly formatted, contained illegal - values, or was missing altogether.*/ -#define OP_EBADHEADER (-133) -/**The ID header contained an unrecognized version number.*/ -#define OP_EVERSION (-134) -/*Currently not used at all.*/ -#define OP_ENOTAUDIO (-135) -/**An audio packet failed to decode properly. - This is usually caused by a multistream Ogg packet where the durations of - the individual Opus packets contained in it are not all the same.*/ -#define OP_EBADPACKET (-136) -/**We failed to find data we had seen before, or the bitstream structure was - sufficiently malformed that seeking to the target destination was - impossible.*/ -#define OP_EBADLINK (-137) -/**An operation that requires seeking was requested on an unseekable stream.*/ -#define OP_ENOSEEK (-138) -/**The first or last granule position of a link failed basic validity checks.*/ -#define OP_EBADTIMESTAMP (-139) - -/*@}*/ -/*@}*/ - -/**\defgroup header_info Header Information*/ -/*@{*/ - -/**The maximum number of channels in an Ogg Opus stream.*/ -#define OPUS_CHANNEL_COUNT_MAX (255) - -/**Ogg Opus bitstream information. - This contains the basic playback parameters for a stream, and corresponds to - the initial ID header packet of an Ogg Opus stream.*/ -struct OpusHead{ - /**The Ogg Opus format version, in the range 0...255. - The top 4 bits represent a "major" version, and the bottom four bits - represent backwards-compatible "minor" revisions. - The current specification describes version 1. - This library will recognize versions up through 15 as backwards compatible - with the current specification. - An earlier draft of the specification described a version 0, but the only - difference between version 1 and version 0 is that version 0 did - not specify the semantics for handling the version field.*/ - int version; - /**The number of channels, in the range 1...255.*/ - int channel_count; - /**The number of samples that should be discarded from the beginning of the - stream.*/ - unsigned pre_skip; - /**The sampling rate of the original input. - All Opus audio is coded at 48 kHz, and should also be decoded at 48 kHz - for playback (unless the target hardware does not support this sampling - rate). - However, this field may be used to resample the audio back to the original - sampling rate, for example, when saving the output to a file.*/ - opus_uint32 input_sample_rate; - /**The gain to apply to the decoded output, in dB, as a Q8 value in the range - -32768...32767. - The libopusfile API will automatically apply this gain to the - decoded output before returning it, scaling it by - pow(10,output_gain/(20.0*256)).*/ - int output_gain; - /**The channel mapping family, in the range 0...255. - Channel mapping family 0 covers mono or stereo in a single stream. - Channel mapping family 1 covers 1 to 8 channels in one or more streams, - using the Vorbis speaker assignments. - Channel mapping family 255 covers 1 to 255 channels in one or more - streams, but without any defined speaker assignment.*/ - int mapping_family; - /**The number of Opus streams in each Ogg packet, in the range 1...255.*/ - int stream_count; - /**The number of coupled Opus streams in each Ogg packet, in the range - 0...127. - This must satisfy 0 <= coupled_count <= stream_count and - coupled_count + stream_count <= 255. - The coupled streams appear first, before all uncoupled streams, in an Ogg - Opus packet.*/ - int coupled_count; - /**The mapping from coded stream channels to output channels. - Let index=mapping[k] be the value for channel k. - If index<2*coupled_count, then it refers to the left channel - from stream (index/2) if even, and the right channel from - stream (index/2) if odd. - Otherwise, it refers to the output of the uncoupled stream - (index-coupled_count).*/ - unsigned char mapping[OPUS_CHANNEL_COUNT_MAX]; -}; - -/**The metadata from an Ogg Opus stream. - - This structure holds the in-stream metadata corresponding to the 'comment' - header packet of an Ogg Opus stream. - The comment header is meant to be used much like someone jotting a quick - note on the label of a CD. - It should be a short, to the point text note that can be more than a couple - words, but not more than a short paragraph. - - The metadata is stored as a series of (tag, value) pairs, in length-encoded - string vectors, using the same format as Vorbis (without the final "framing - bit"), Theora, and Speex, except for the packet header. - The first occurrence of the '=' character delimits the tag and value. - A particular tag may occur more than once, and order is significant. - The character set encoding for the strings is always UTF-8, but the tag - names are limited to ASCII, and treated as case-insensitive. - See the Vorbis - comment header specification for details. - - In filling in this structure, libopusfile will null-terminate the - #user_comments strings for safety. - However, the bitstream format itself treats them as 8-bit clean vectors, - possibly containing NUL characters, so the #comment_lengths array should be - treated as their authoritative length. - - This structure is binary and source-compatible with a - vorbis_comment, and pointers to it may be freely cast to - vorbis_comment pointers, and vice versa. - It is provided as a separate type to avoid introducing a compile-time - dependency on the libvorbis headers.*/ -struct OpusTags{ - /**The array of comment string vectors.*/ - char **user_comments; - /**An array of the corresponding length of each vector, in bytes.*/ - int *comment_lengths; - /**The total number of comment streams.*/ - int comments; - /**The null-terminated vendor string. - This identifies the software used to encode the stream.*/ - char *vendor; -}; - -/**\name Picture tag image formats*/ -/*@{*/ - -/**The MIME type was not recognized, or the image data did not match the - declared MIME type.*/ -#define OP_PIC_FORMAT_UNKNOWN (-1) -/**The MIME type indicates the image data is really a URL.*/ -#define OP_PIC_FORMAT_URL (0) -/**The image is a JPEG.*/ -#define OP_PIC_FORMAT_JPEG (1) -/**The image is a PNG.*/ -#define OP_PIC_FORMAT_PNG (2) -/**The image is a GIF.*/ -#define OP_PIC_FORMAT_GIF (3) - -/*@}*/ - -/**The contents of a METADATA_BLOCK_PICTURE tag.*/ -struct OpusPictureTag{ - /**The picture type according to the ID3v2 APIC frame: -
      -
    1. Other
    2. -
    3. 32x32 pixels 'file icon' (PNG only)
    4. -
    5. Other file icon
    6. -
    7. Cover (front)
    8. -
    9. Cover (back)
    10. -
    11. Leaflet page
    12. -
    13. Media (e.g. label side of CD)
    14. -
    15. Lead artist/lead performer/soloist
    16. -
    17. Artist/performer
    18. -
    19. Conductor
    20. -
    21. Band/Orchestra
    22. -
    23. Composer
    24. -
    25. Lyricist/text writer
    26. -
    27. Recording Location
    28. -
    29. During recording
    30. -
    31. During performance
    32. -
    33. Movie/video screen capture
    34. -
    35. A bright colored fish
    36. -
    37. Illustration
    38. -
    39. Band/artist logotype
    40. -
    41. Publisher/Studio logotype
    42. -
    - Others are reserved and should not be used. - There may only be one each of picture type 1 and 2 in a file.*/ - opus_int32 type; - /**The MIME type of the picture, in printable ASCII characters 0x20-0x7E. - The MIME type may also be "-->" to signify that the data part - is a URL pointing to the picture instead of the picture data itself. - In this case, a terminating NUL is appended to the URL string in #data, - but #data_length is set to the length of the string excluding that - terminating NUL.*/ - char *mime_type; - /**The description of the picture, in UTF-8.*/ - char *description; - /**The width of the picture in pixels.*/ - opus_uint32 width; - /**The height of the picture in pixels.*/ - opus_uint32 height; - /**The color depth of the picture in bits-per-pixel (not - bits-per-channel).*/ - opus_uint32 depth; - /**For indexed-color pictures (e.g., GIF), the number of colors used, or 0 - for non-indexed pictures.*/ - opus_uint32 colors; - /**The length of the picture data in bytes.*/ - opus_uint32 data_length; - /**The binary picture data.*/ - unsigned char *data; - /**The format of the picture data, if known. - One of -
      -
    • #OP_PIC_FORMAT_UNKNOWN,
    • -
    • #OP_PIC_FORMAT_URL,
    • -
    • #OP_PIC_FORMAT_JPEG,
    • -
    • #OP_PIC_FORMAT_PNG, or
    • -
    • #OP_PIC_FORMAT_GIF.
    • -
    */ - int format; -}; - -/**\name Functions for manipulating header data - - These functions manipulate the #OpusHead and #OpusTags structures, - which describe the audio parameters and tag-value metadata, respectively. - These can be used to query the headers returned by libopusfile, or - to parse Opus headers from sources other than an Ogg Opus stream, provided - they use the same format.*/ -/*@{*/ - -/**Parses the contents of the ID header packet of an Ogg Opus stream. - \param[out] _head Returns the contents of the parsed packet. - The contents of this structure are untouched on error. - This may be NULL to merely test the header - for validity. - \param[in] _data The contents of the ID header packet. - \param _len The number of bytes of data in the ID header packet. - \return 0 on success or a negative value on error. - \retval #OP_ENOTFORMAT If the data does not start with the "OpusHead" - string. - \retval #OP_EVERSION If the version field signaled a version this library - does not know how to parse. - \retval #OP_EIMPL If the channel mapping family was 255, which general - purpose players should not attempt to play. - \retval #OP_EBADHEADER If the contents of the packet otherwise violate the - Ogg Opus specification: -
      -
    • Insufficient data,
    • -
    • Too much data for the known minor versions,
    • -
    • An unrecognized channel mapping family,
    • -
    • Zero channels or too many channels,
    • -
    • Zero coded streams,
    • -
    • Too many coupled streams, or
    • -
    • An invalid channel mapping index.
    • -
    */ -OP_WARN_UNUSED_RESULT int opus_head_parse(OpusHead *_head, - const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2); - -/**Converts a granule position to a sample offset for a given Ogg Opus stream. - The sample offset is simply _gp-_head->pre_skip. - Granule position values smaller than OpusHead#pre_skip correspond to audio - that should never be played, and thus have no associated sample offset. - This function returns -1 for such values. - This function also correctly handles extremely large granule positions, - which may have wrapped around to a negative number when stored in a signed - ogg_int64_t value. - \param _head The #OpusHead information from the ID header of the stream. - \param _gp The granule position to convert. - \return The sample offset associated with the given granule position - (counting at a 48 kHz sampling rate), or the special value -1 on - error (i.e., the granule position was smaller than the pre-skip - amount).*/ -ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp) - OP_ARG_NONNULL(1); - -/**Parses the contents of the 'comment' header packet of an Ogg Opus stream. - \param[out] _tags An uninitialized #OpusTags structure. - This returns the contents of the parsed packet. - The contents of this structure are untouched on error. - This may be NULL to merely test the header - for validity. - \param[in] _data The contents of the 'comment' header packet. - \param _len The number of bytes of data in the 'info' header packet. - \retval 0 Success. - \retval #OP_ENOTFORMAT If the data does not start with the "OpusTags" - string. - \retval #OP_EBADHEADER If the contents of the packet otherwise violate the - Ogg Opus specification. - \retval #OP_EFAULT If there wasn't enough memory to store the tags.*/ -OP_WARN_UNUSED_RESULT int opus_tags_parse(OpusTags *_tags, - const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2); - -/**Performs a deep copy of an #OpusTags structure. - \param _dst The #OpusTags structure to copy into. - If this function fails, the contents of this structure remain - untouched. - \param _src The #OpusTags structure to copy from. - \retval 0 Success. - \retval #OP_EFAULT If there wasn't enough memory to copy the tags.*/ -int opus_tags_copy(OpusTags *_dst,const OpusTags *_src) OP_ARG_NONNULL(1); - -/**Initializes an #OpusTags structure. - This should be called on a freshly allocated #OpusTags structure before - attempting to use it. - \param _tags The #OpusTags structure to initialize.*/ -void opus_tags_init(OpusTags *_tags) OP_ARG_NONNULL(1); - -/**Add a (tag, value) pair to an initialized #OpusTags structure. - \note Neither opus_tags_add() nor opus_tags_add_comment() support values - containing embedded NULs, although the bitstream format does support them. - To add such tags, you will need to manipulate the #OpusTags structure - directly. - \param _tags The #OpusTags structure to add the (tag, value) pair to. - \param _tag A NUL-terminated, case-insensitive, ASCII string containing - the tag to add (without an '=' character). - \param _value A NUL-terminated UTF-8 containing the corresponding value. - \return 0 on success, or a negative value on failure. - \retval #OP_EFAULT An internal memory allocation failed.*/ -int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) OP_ARG_NONNULL(3); - -/**Add a comment to an initialized #OpusTags structure. - \note Neither opus_tags_add_comment() nor opus_tags_add() support comments - containing embedded NULs, although the bitstream format does support them. - To add such tags, you will need to manipulate the #OpusTags structure - directly. - \param _tags The #OpusTags structure to add the comment to. - \param _comment A NUL-terminated UTF-8 string containing the comment in - "TAG=value" form. - \return 0 on success, or a negative value on failure. - \retval #OP_EFAULT An internal memory allocation failed.*/ -int opus_tags_add_comment(OpusTags *_tags,const char *_comment) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Replace the binary suffix data at the end of the packet (if any). - \param _tags An initialized #OpusTags structure. - \param _data A buffer of binary data to append after the encoded user - comments. - The least significant bit of the first byte of this data must - be set (to ensure the data is preserved by other editors). - \param _len The number of bytes of binary data to append. - This may be zero to remove any existing binary suffix data. - \return 0 on success, or a negative value on error. - \retval #OP_EINVAL \a _len was negative, or \a _len was positive but - \a _data was NULL or the least significant - bit of the first byte was not set. - \retval #OP_EFAULT An internal memory allocation failed.*/ -int opus_tags_set_binary_suffix(OpusTags *_tags, - const unsigned char *_data,int _len) OP_ARG_NONNULL(1); - -/**Look up a comment value by its tag. - \param _tags An initialized #OpusTags structure. - \param _tag The tag to look up. - \param _count The instance of the tag. - The same tag can appear multiple times, each with a distinct - value, so an index is required to retrieve them all. - The order in which these values appear is significant and - should be preserved. - Use opus_tags_query_count() to get the legal range for the - \a _count parameter. - \return A pointer to the queried tag's value. - This points directly to data in the #OpusTags structure. - It should not be modified or freed by the application, and - modifications to the structure may invalidate the pointer. - \retval NULL If no matching tag is found.*/ -const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Look up the number of instances of a tag. - Call this first when querying for a specific tag and then iterate over the - number of instances with separate calls to opus_tags_query() to retrieve - all the values for that tag in order. - \param _tags An initialized #OpusTags structure. - \param _tag The tag to look up. - \return The number of instances of this particular tag.*/ -int opus_tags_query_count(const OpusTags *_tags,const char *_tag) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Retrieve the binary suffix data at the end of the packet (if any). - \param _tags An initialized #OpusTags structure. - \param[out] _len Returns the number of bytes of binary suffix data returned. - \return A pointer to the binary suffix data, or NULL if none - was present.*/ -const unsigned char *opus_tags_get_binary_suffix(const OpusTags *_tags, - int *_len) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Get the album gain from an R128_ALBUM_GAIN tag, if one was specified. - This searches for the first R128_ALBUM_GAIN tag with a valid signed, - 16-bit decimal integer value and returns the value. - This routine is exposed merely for convenience for applications which wish - to do something special with the album gain (i.e., display it). - If you simply wish to apply the album gain instead of the header gain, you - can use op_set_gain_offset() with an #OP_ALBUM_GAIN type and no offset. - \param _tags An initialized #OpusTags structure. - \param[out] _gain_q8 The album gain, in 1/256ths of a dB. - This will lie in the range [-32768,32767], and should - be applied in addition to the header gain. - On error, no value is returned, and the previous - contents remain unchanged. - \return 0 on success, or a negative value on error. - \retval #OP_FALSE There was no album gain available in the given tags.*/ -int opus_tags_get_album_gain(const OpusTags *_tags,int *_gain_q8) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Get the track gain from an R128_TRACK_GAIN tag, if one was specified. - This searches for the first R128_TRACK_GAIN tag with a valid signed, - 16-bit decimal integer value and returns the value. - This routine is exposed merely for convenience for applications which wish - to do something special with the track gain (i.e., display it). - If you simply wish to apply the track gain instead of the header gain, you - can use op_set_gain_offset() with an #OP_TRACK_GAIN type and no offset. - \param _tags An initialized #OpusTags structure. - \param[out] _gain_q8 The track gain, in 1/256ths of a dB. - This will lie in the range [-32768,32767], and should - be applied in addition to the header gain. - On error, no value is returned, and the previous - contents remain unchanged. - \return 0 on success, or a negative value on error. - \retval #OP_FALSE There was no track gain available in the given tags.*/ -int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Clears the #OpusTags structure. - This should be called on an #OpusTags structure after it is no longer - needed. - It will free all memory used by the structure members. - \param _tags The #OpusTags structure to clear.*/ -void opus_tags_clear(OpusTags *_tags) OP_ARG_NONNULL(1); - -/**Check if \a _comment is an instance of a \a _tag_name tag. - \see opus_tagncompare - \param _tag_name A NUL-terminated, case-insensitive, ASCII string containing - the name of the tag to check for (without the terminating - '=' character). - \param _comment The comment string to check. - \return An integer less than, equal to, or greater than zero if \a _comment - is found respectively, to be less than, to match, or be greater - than a "tag=value" string whose tag matches \a _tag_name.*/ -int opus_tagcompare(const char *_tag_name,const char *_comment); - -/**Check if \a _comment is an instance of a \a _tag_name tag. - This version is slightly more efficient than opus_tagcompare() if the length - of the tag name is already known (e.g., because it is a constant). - \see opus_tagcompare - \param _tag_name A case-insensitive ASCII string containing the name of the - tag to check for (without the terminating '=' character). - \param _tag_len The number of characters in the tag name. - This must be non-negative. - \param _comment The comment string to check. - \return An integer less than, equal to, or greater than zero if \a _comment - is found respectively, to be less than, to match, or be greater - than a "tag=value" string whose tag matches the first \a _tag_len - characters of \a _tag_name.*/ -int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment); - -/**Parse a single METADATA_BLOCK_PICTURE tag. - This decodes the BASE64-encoded content of the tag and returns a structure - with the MIME type, description, image parameters (if known), and the - compressed image data. - If the MIME type indicates the presence of an image format we recognize - (JPEG, PNG, or GIF) and the actual image data contains the magic signature - associated with that format, then the OpusPictureTag::format field will be - set to the corresponding format. - This is provided as a convenience to avoid requiring applications to parse - the MIME type and/or do their own format detection for the commonly used - formats. - In this case, we also attempt to extract the image parameters directly from - the image data (overriding any that were present in the tag, which the - specification says applications are not meant to rely on). - The application must still provide its own support for actually decoding the - image data and, if applicable, retrieving that data from URLs. - \param[out] _pic Returns the parsed picture data. - No sanitation is done on the type, MIME type, or - description fields, so these might return invalid values. - The contents of this structure are left unmodified on - failure. - \param _tag The METADATA_BLOCK_PICTURE tag contents. - The leading "METADATA_BLOCK_PICTURE=" portion is optional, - to allow the function to be used on either directly on the - values in OpusTags::user_comments or on the return value - of opus_tags_query(). - \return 0 on success or a negative value on error. - \retval #OP_ENOTFORMAT The METADATA_BLOCK_PICTURE contents were not valid. - \retval #OP_EFAULT There was not enough memory to store the picture tag - contents.*/ -OP_WARN_UNUSED_RESULT int opus_picture_tag_parse(OpusPictureTag *_pic, - const char *_tag) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Initializes an #OpusPictureTag structure. - This should be called on a freshly allocated #OpusPictureTag structure - before attempting to use it. - \param _pic The #OpusPictureTag structure to initialize.*/ -void opus_picture_tag_init(OpusPictureTag *_pic) OP_ARG_NONNULL(1); - -/**Clears the #OpusPictureTag structure. - This should be called on an #OpusPictureTag structure after it is no longer - needed. - It will free all memory used by the structure members. - \param _pic The #OpusPictureTag structure to clear.*/ -void opus_picture_tag_clear(OpusPictureTag *_pic) OP_ARG_NONNULL(1); - -/*@}*/ - -/*@}*/ - -/**\defgroup url_options URL Reading Options*/ -/*@{*/ -/**\name URL reading options - Options for op_url_stream_create() and associated functions. - These allow you to provide proxy configuration parameters, skip SSL - certificate checks, etc. - Options are processed in order, and if the same option is passed multiple - times, only the value specified by the last occurrence has an effect - (unless otherwise specified). - They may be expanded in the future.*/ -/*@{*/ - -/**@cond PRIVATE*/ - -/*These are the raw numbers used to define the request codes. - They should not be used directly.*/ -#define OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST (6464) -#define OP_HTTP_PROXY_HOST_REQUEST (6528) -#define OP_HTTP_PROXY_PORT_REQUEST (6592) -#define OP_HTTP_PROXY_USER_REQUEST (6656) -#define OP_HTTP_PROXY_PASS_REQUEST (6720) -#define OP_GET_SERVER_INFO_REQUEST (6784) - -#define OP_URL_OPT(_request) ((_request)+(char *)0) - -/*These macros trigger compilation errors or warnings if the wrong types are - provided to one of the URL options.*/ -#define OP_CHECK_INT(_x) ((void)((_x)==(opus_int32)0),(opus_int32)(_x)) -#define OP_CHECK_CONST_CHAR_PTR(_x) ((_x)+((_x)-(const char *)(_x))) -#define OP_CHECK_SERVER_INFO_PTR(_x) ((_x)+((_x)-(OpusServerInfo *)(_x))) - -/**@endcond*/ - -/**HTTP/Shoutcast/Icecast server information associated with a URL.*/ -struct OpusServerInfo{ - /**The name of the server (icy-name/ice-name). - This is NULL if there was no icy-name or - ice-name header.*/ - char *name; - /**A short description of the server (icy-description/ice-description). - This is NULL if there was no icy-description or - ice-description header.*/ - char *description; - /**The genre the server falls under (icy-genre/ice-genre). - This is NULL if there was no icy-genre or - ice-genre header.*/ - char *genre; - /**The homepage for the server (icy-url/ice-url). - This is NULL if there was no icy-url or - ice-url header.*/ - char *url; - /**The software used by the origin server (Server). - This is NULL if there was no Server header.*/ - char *server; - /**The media type of the entity sent to the recepient (Content-Type). - This is NULL if there was no Content-Type - header.*/ - char *content_type; - /**The nominal stream bitrate in kbps (icy-br/ice-bitrate). - This is -1 if there was no icy-br or - ice-bitrate header.*/ - opus_int32 bitrate_kbps; - /**Flag indicating whether the server is public (1) or not - (0) (icy-pub/ice-public). - This is -1 if there was no icy-pub or - ice-public header.*/ - int is_public; - /**Flag indicating whether the server is using HTTPS instead of HTTP. - This is 0 unless HTTPS is being used. - This may not match the protocol used in the original URL if there were - redirections.*/ - int is_ssl; -}; - -/**Initializes an #OpusServerInfo structure. - All fields are set as if the corresponding header was not available. - \param _info The #OpusServerInfo structure to initialize. - \note If you use this function, you must link against libopusurl.*/ -void opus_server_info_init(OpusServerInfo *_info) OP_ARG_NONNULL(1); - -/**Clears the #OpusServerInfo structure. - This should be called on an #OpusServerInfo structure after it is no longer - needed. - It will free all memory used by the structure members. - \param _info The #OpusServerInfo structure to clear. - \note If you use this function, you must link against libopusurl.*/ -void opus_server_info_clear(OpusServerInfo *_info) OP_ARG_NONNULL(1); - -/**Skip the certificate check when connecting via TLS/SSL (https). - \param _b opus_int32: Whether or not to skip the certificate - check. - The check will be skipped if \a _b is non-zero, and will not be - skipped if \a _b is zero. - \hideinitializer*/ -#define OP_SSL_SKIP_CERTIFICATE_CHECK(_b) \ - OP_URL_OPT(OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST),OP_CHECK_INT(_b) - -/**Proxy connections through the given host. - If no port is specified via #OP_HTTP_PROXY_PORT, the port number defaults - to 8080 (http-alt). - All proxy parameters are ignored for non-http and non-https URLs. - \param _host const char *: The proxy server hostname. - This may be NULL to disable the use of a proxy - server. - \hideinitializer*/ -#define OP_HTTP_PROXY_HOST(_host) \ - OP_URL_OPT(OP_HTTP_PROXY_HOST_REQUEST),OP_CHECK_CONST_CHAR_PTR(_host) - -/**Use the given port when proxying connections. - This option only has an effect if #OP_HTTP_PROXY_HOST is specified with a - non-NULL \a _host. - If this option is not provided, the proxy port number defaults to 8080 - (http-alt). - All proxy parameters are ignored for non-http and non-https URLs. - \param _port opus_int32: The proxy server port. - This must be in the range 0...65535 (inclusive), or the - URL function this is passed to will fail. - \hideinitializer*/ -#define OP_HTTP_PROXY_PORT(_port) \ - OP_URL_OPT(OP_HTTP_PROXY_PORT_REQUEST),OP_CHECK_INT(_port) - -/**Use the given user name for authentication when proxying connections. - All proxy parameters are ignored for non-http and non-https URLs. - \param _user const char *: The proxy server user name. - This may be NULL to disable proxy - authentication. - A non-NULL value only has an effect - if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_PASS - are also specified with non-NULL - arguments. - \hideinitializer*/ -#define OP_HTTP_PROXY_USER(_user) \ - OP_URL_OPT(OP_HTTP_PROXY_USER_REQUEST),OP_CHECK_CONST_CHAR_PTR(_user) - -/**Use the given password for authentication when proxying connections. - All proxy parameters are ignored for non-http and non-https URLs. - \param _pass const char *: The proxy server password. - This may be NULL to disable proxy - authentication. - A non-NULL value only has an effect - if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_USER - are also specified with non-NULL - arguments. - \hideinitializer*/ -#define OP_HTTP_PROXY_PASS(_pass) \ - OP_URL_OPT(OP_HTTP_PROXY_PASS_REQUEST),OP_CHECK_CONST_CHAR_PTR(_pass) - -/**Parse information about the streaming server (if any) and return it. - Very little validation is done. - In particular, OpusServerInfo::url may not be a valid URL, - OpusServerInfo::bitrate_kbps may not really be in kbps, and - OpusServerInfo::content_type may not be a valid MIME type. - The character set of the string fields is not specified anywhere, and should - not be assumed to be valid UTF-8. - \param _info OpusServerInfo *: Returns information about the server. - If there is any error opening the stream, the - contents of this structure remain - unmodified. - On success, fills in the structure with the - server information that was available, if - any. - After a successful return, the contents of - this structure should be freed by calling - opus_server_info_clear(). - \hideinitializer*/ -#define OP_GET_SERVER_INFO(_info) \ - OP_URL_OPT(OP_GET_SERVER_INFO_REQUEST),OP_CHECK_SERVER_INFO_PTR(_info) - -/*@}*/ -/*@}*/ - -/**\defgroup stream_callbacks Abstract Stream Reading Interface*/ -/*@{*/ -/**\name Functions for reading from streams - These functions define the interface used to read from and seek in a stream - of data. - A stream does not need to implement seeking, but the decoder will not be - able to seek if it does not do so. - These functions also include some convenience routines for working with - standard FILE pointers, complete streams stored in a single - block of memory, or URLs.*/ -/*@{*/ - -/**Reads up to \a _nbytes bytes of data from \a _stream. - \param _stream The stream to read from. - \param[out] _ptr The buffer to store the data in. - \param _nbytes The maximum number of bytes to read. - This function may return fewer, though it will not - return zero unless it reaches end-of-file. - \return The number of bytes successfully read, or a negative value on - error.*/ -typedef int (*op_read_func)(void *_stream,unsigned char *_ptr,int _nbytes); - -/**Sets the position indicator for \a _stream. - The new position, measured in bytes, is obtained by adding \a _offset - bytes to the position specified by \a _whence. - If \a _whence is set to SEEK_SET, SEEK_CUR, or - SEEK_END, the offset is relative to the start of the stream, - the current position indicator, or end-of-file, respectively. - \retval 0 Success. - \retval -1 Seeking is not supported or an error occurred. - errno need not be set.*/ -typedef int (*op_seek_func)(void *_stream,opus_int64 _offset,int _whence); - -/**Obtains the current value of the position indicator for \a _stream. - \return The current position indicator.*/ -typedef opus_int64 (*op_tell_func)(void *_stream); - -/**Closes the underlying stream. - \retval 0 Success. - \retval EOF An error occurred. - errno need not be set.*/ -typedef int (*op_close_func)(void *_stream); - -/**The callbacks used to access non-FILE stream resources. - The function prototypes are basically the same as for the stdio functions - fread(), fseek(), ftell(), and - fclose(). - The differences are that the FILE * arguments have been - replaced with a void *, which is to be used as a pointer to - whatever internal data these functions might need, that #seek and #tell - take and return 64-bit offsets, and that #seek must return -1 if - the stream is unseekable.*/ -struct OpusFileCallbacks{ - /**Used to read data from the stream. - This must not be NULL.*/ - op_read_func read; - /**Used to seek in the stream. - This may be NULL if seeking is not implemented.*/ - op_seek_func seek; - /**Used to return the current read position in the stream. - This may be NULL if seeking is not implemented.*/ - op_tell_func tell; - /**Used to close the stream when the decoder is freed. - This may be NULL to leave the stream open.*/ - op_close_func close; -}; - -/**Opens a stream with fopen() and fills in a set of callbacks - that can be used to access it. - This is useful to avoid writing your own portable 64-bit seeking wrappers, - and also avoids cross-module linking issues on Windows, where a - FILE * must be accessed by routines defined in the same module - that opened it. - \param[out] _cb The callbacks to use for this file. - If there is an error opening the file, nothing will be - filled in here. - \param _path The path to the file to open. - On Windows, this string must be UTF-8 (to allow access to - files whose names cannot be represented in the current - MBCS code page). - All other systems use the native character encoding. - \param _mode The mode to open the file in. - \return A stream handle to use with the callbacks, or NULL on - error.*/ -OP_WARN_UNUSED_RESULT void *op_fopen(OpusFileCallbacks *_cb, - const char *_path,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) - OP_ARG_NONNULL(3); - -/**Opens a stream with fdopen() and fills in a set of callbacks - that can be used to access it. - This is useful to avoid writing your own portable 64-bit seeking wrappers, - and also avoids cross-module linking issues on Windows, where a - FILE * must be accessed by routines defined in the same module - that opened it. - \param[out] _cb The callbacks to use for this file. - If there is an error opening the file, nothing will be - filled in here. - \param _fd The file descriptor to open. - \param _mode The mode to open the file in. - \return A stream handle to use with the callbacks, or NULL on - error.*/ -OP_WARN_UNUSED_RESULT void *op_fdopen(OpusFileCallbacks *_cb, - int _fd,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(3); - -/**Opens a stream with freopen() and fills in a set of callbacks - that can be used to access it. - This is useful to avoid writing your own portable 64-bit seeking wrappers, - and also avoids cross-module linking issues on Windows, where a - FILE * must be accessed by routines defined in the same module - that opened it. - \param[out] _cb The callbacks to use for this file. - If there is an error opening the file, nothing will be - filled in here. - \param _path The path to the file to open. - On Windows, this string must be UTF-8 (to allow access - to files whose names cannot be represented in the - current MBCS code page). - All other systems use the native character encoding. - \param _mode The mode to open the file in. - \param _stream A stream previously returned by op_fopen(), op_fdopen(), - or op_freopen(). - \return A stream handle to use with the callbacks, or NULL on - error.*/ -OP_WARN_UNUSED_RESULT void *op_freopen(OpusFileCallbacks *_cb, - const char *_path,const char *_mode,void *_stream) OP_ARG_NONNULL(1) - OP_ARG_NONNULL(2) OP_ARG_NONNULL(3) OP_ARG_NONNULL(4); - -/**Creates a stream that reads from the given block of memory. - This block of memory must contain the complete stream to decode. - This is useful for caching small streams (e.g., sound effects) in RAM. - \param[out] _cb The callbacks to use for this stream. - If there is an error creating the stream, nothing will be - filled in here. - \param _data The block of memory to read from. - \param _size The size of the block of memory. - \return A stream handle to use with the callbacks, or NULL on - error.*/ -OP_WARN_UNUSED_RESULT void *op_mem_stream_create(OpusFileCallbacks *_cb, - const unsigned char *_data,size_t _size) OP_ARG_NONNULL(1); - -/**Creates a stream that reads from the given URL. - This function behaves identically to op_url_stream_create(), except that it - takes a va_list instead of a variable number of arguments. - It does not call the va_end macro, and because it invokes the - va_arg macro, the value of \a _ap is undefined after the call. - \note If you use this function, you must link against libopusurl. - \param[out] _cb The callbacks to use for this stream. - If there is an error creating the stream, nothing will - be filled in here. - \param _url The URL to read from. - Currently only the , , and - schemes are supported. - Both and may be disabled at compile - time, in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, - before passing them to this function. - \param[in,out] _ap A list of the \ref url_options "optional flags" to use. - This is a variable-length list of options terminated - with NULL. - \return A stream handle to use with the callbacks, or NULL on - error.*/ -OP_WARN_UNUSED_RESULT void *op_url_stream_vcreate(OpusFileCallbacks *_cb, - const char *_url,va_list _ap) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Creates a stream that reads from the given URL. - \note If you use this function, you must link against libopusurl. - \param[out] _cb The callbacks to use for this stream. - If there is an error creating the stream, nothing will be - filled in here. - \param _url The URL to read from. - Currently only the , , and schemes - are supported. - Both and may be disabled at compile time, - in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, before - passing them to this function. - \param ... The \ref url_options "optional flags" to use. - This is a variable-length list of options terminated with - NULL. - \return A stream handle to use with the callbacks, or NULL on - error.*/ -OP_WARN_UNUSED_RESULT void *op_url_stream_create(OpusFileCallbacks *_cb, - const char *_url,...) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_open_close Opening and Closing*/ -/*@{*/ -/**\name Functions for opening and closing streams - - These functions allow you to test a stream to see if it is Opus, open it, - and close it. - Several flavors are provided for each of the built-in stream types, plus a - more general version which takes a set of application-provided callbacks.*/ -/*@{*/ - -/**Test to see if this is an Opus stream. - For good results, you will need at least 57 bytes (for a pure Opus-only - stream). - Something like 512 bytes will give more reliable results for multiplexed - streams. - This function is meant to be a quick-rejection filter. - Its purpose is not to guarantee that a stream is a valid Opus stream, but to - ensure that it looks enough like Opus that it isn't going to be recognized - as some other format (except possibly an Opus stream that is also - multiplexed with other codecs, such as video). - \param[out] _head The parsed ID header contents. - You may pass NULL if you do not need - this information. - If the function fails, the contents of this structure - remain untouched. - \param _initial_data An initial buffer of data from the start of the - stream. - \param _initial_bytes The number of bytes in \a _initial_data. - \return 0 if the data appears to be Opus, or a negative value on error. - \retval #OP_FALSE There was not enough data to tell if this was an Opus - stream or not. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL The stream used a feature that is not implemented, - such as an unsupported channel family. - \retval #OP_ENOTFORMAT If the data did not contain a recognizable ID - header for an Opus stream. - \retval #OP_EVERSION If the version field signaled a version this library - does not know how to parse. - \retval #OP_EBADHEADER The ID header was not properly formatted or contained - illegal values.*/ -int op_test(OpusHead *_head, - const unsigned char *_initial_data,size_t _initial_bytes); - -/**Open a stream from the given file path. - \param _path The path to the file to open. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want the - failure code. - The failure code will be #OP_EFAULT if the file could not - be opened, or one of the other failure codes from - op_open_callbacks() otherwise. - \return A freshly opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_file(const char *_path,int *_error) - OP_ARG_NONNULL(1); - -/**Open a stream from a memory buffer. - \param _data The memory buffer to open. - \param _size The number of bytes in the buffer. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want the - failure code. - See op_open_callbacks() for a full list of failure codes. - \return A freshly opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_memory(const unsigned char *_data, - size_t _size,int *_error); - -/**Open a stream from a URL. - This function behaves identically to op_open_url(), except that it - takes a va_list instead of a variable number of arguments. - It does not call the va_end macro, and because it invokes the - va_arg macro, the value of \a _ap is undefined after the call. - \note If you use this function, you must link against libopusurl. - \param _url The URL to open. - Currently only the , , and - schemes are supported. - Both and may be disabled at compile - time, in which case opening such URLs will always - fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, - with internationalized domain names encoded in - punycode, before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want - the failure code. - See op_open_callbacks() for a full list of failure - codes. - \param[in,out] _ap A list of the \ref url_options "optional flags" to - use. - This is a variable-length list of options terminated - with NULL. - \return A freshly opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_vopen_url(const char *_url, - int *_error,va_list _ap) OP_ARG_NONNULL(1); - -/**Open a stream from a URL. - \note If you use this function, you must link against libopusurl. - \param _url The URL to open. - Currently only the , , and schemes - are supported. - Both and may be disabled at compile - time, in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, - before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want the - failure code. - See op_open_callbacks() for a full list of failure codes. - \param ... The \ref url_options "optional flags" to use. - This is a variable-length list of options terminated with - NULL. - \return A freshly opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url, - int *_error,...) OP_ARG_NONNULL(1); - -/**Open a stream using the given set of callbacks to access it. - \param _source The stream to read from (e.g., a FILE *). - \param _cb The callbacks with which to access the stream. - read() must - be implemented. - seek() and - tell() may - be NULL, or may always return -1 to - indicate a source is unseekable, but if - seek() is - implemented and succeeds on a particular source, then - tell() must - also. - close() may - be NULL, but if it is not, it will be - called when the \c OggOpusFile is destroyed by - op_free(). - It will not be called if op_open_callbacks() fails - with an error. - \param _initial_data An initial buffer of data from the start of the - stream. - Applications can read some number of bytes from the - start of the stream to help identify this as an Opus - stream, and then provide them here to allow the - stream to be opened, even if it is unseekable. - \param _initial_bytes The number of bytes in \a _initial_data. - If the stream is seekable, its current position (as - reported by - tell() - at the start of this function) must be equal to - \a _initial_bytes. - Otherwise, seeking to absolute positions will - generate inconsistent results. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want - the failure code. - The failure code will be one of -
    -
    #OP_EREAD
    -
    An underlying read, seek, or tell operation - failed when it should have succeeded, or we failed - to find data in the stream we had seen before.
    -
    #OP_EFAULT
    -
    There was a memory allocation failure, or an - internal library error.
    -
    #OP_EIMPL
    -
    The stream used a feature that is not - implemented, such as an unsupported channel - family.
    -
    #OP_EINVAL
    -
    seek() - was implemented and succeeded on this source, but - tell() - did not, or the starting position indicator was - not equal to \a _initial_bytes.
    -
    #OP_ENOTFORMAT
    -
    The stream contained a link that did not have - any logical Opus streams in it.
    -
    #OP_EBADHEADER
    -
    A required header packet was not properly - formatted, contained illegal values, or was missing - altogether.
    -
    #OP_EVERSION
    -
    An ID header contained an unrecognized version - number.
    -
    #OP_EBADLINK
    -
    We failed to find data we had seen before after - seeking.
    -
    #OP_EBADTIMESTAMP
    -
    The first or last timestamp in a link failed - basic validity checks.
    -
    - \return A freshly opened \c OggOpusFile, or NULL on error. - libopusfile does not take ownership of the source - if the call fails. - The calling application is responsible for closing the source if - this call returns an error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_source, - const OpusFileCallbacks *_cb,const unsigned char *_initial_data, - size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); - -/**Partially open a stream from the given file path. - \see op_test_callbacks - \param _path The path to the file to open. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want the - failure code. - The failure code will be #OP_EFAULT if the file could not - be opened, or one of the other failure codes from - op_open_callbacks() otherwise. - \return A partially opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_file(const char *_path,int *_error) - OP_ARG_NONNULL(1); - -/**Partially open a stream from a memory buffer. - \see op_test_callbacks - \param _data The memory buffer to open. - \param _size The number of bytes in the buffer. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want the - failure code. - See op_open_callbacks() for a full list of failure codes. - \return A partially opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_memory(const unsigned char *_data, - size_t _size,int *_error); - -/**Partially open a stream from a URL. - This function behaves identically to op_test_url(), except that it - takes a va_list instead of a variable number of arguments. - It does not call the va_end macro, and because it invokes the - va_arg macro, the value of \a _ap is undefined after the call. - \note If you use this function, you must link against libopusurl. - \see op_test_url - \see op_test_callbacks - \param _url The URL to open. - Currently only the , , and - schemes are supported. - Both and may be disabled at compile - time, in which case opening such URLs will always - fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, - with internationalized domain names encoded in - punycode, before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want - the failure code. - See op_open_callbacks() for a full list of failure - codes. - \param[in,out] _ap A list of the \ref url_options "optional flags" to - use. - This is a variable-length list of options terminated - with NULL. - \return A partially opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_vtest_url(const char *_url, - int *_error,va_list _ap) OP_ARG_NONNULL(1); - -/**Partially open a stream from a URL. - \note If you use this function, you must link against libopusurl. - \see op_test_callbacks - \param _url The URL to open. - Currently only the , , and - schemes are supported. - Both and may be disabled at compile - time, in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, - before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want the - failure code. - See op_open_callbacks() for a full list of failure - codes. - \param ... The \ref url_options "optional flags" to use. - This is a variable-length list of options terminated - with NULL. - \return A partially opened \c OggOpusFile, or NULL on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url, - int *_error,...) OP_ARG_NONNULL(1); - -/**Partially open a stream using the given set of callbacks to access it. - This tests for Opusness and loads the headers for the first link. - It does not seek (although it tests for seekability). - You can query a partially open stream for the few pieces of basic - information returned by op_serialno(), op_channel_count(), op_head(), and - op_tags() (but only for the first link). - You may also determine if it is seekable via a call to op_seekable(). - You cannot read audio from the stream, seek, get the size or duration, - get information from links other than the first one, or even get the total - number of links until you finish opening the stream with op_test_open(). - If you do not need to do any of these things, you can dispose of it with - op_free() instead. - - This function is provided mostly to simplify porting existing code that used - libvorbisfile. - For new code, you are likely better off using op_test() instead, which - is less resource-intensive, requires less data to succeed, and imposes a - hard limit on the amount of data it examines (important for unseekable - sources, where all such data must be buffered until you are sure of the - stream type). - \param _source The stream to read from (e.g., a FILE *). - \param _cb The callbacks with which to access the stream. - read() must - be implemented. - seek() and - tell() may - be NULL, or may always return -1 to - indicate a source is unseekable, but if - seek() is - implemented and succeeds on a particular source, then - tell() must - also. - close() may - be NULL, but if it is not, it will be - called when the \c OggOpusFile is destroyed by - op_free(). - It will not be called if op_open_callbacks() fails - with an error. - \param _initial_data An initial buffer of data from the start of the - stream. - Applications can read some number of bytes from the - start of the stream to help identify this as an Opus - stream, and then provide them here to allow the - stream to be tested more thoroughly, even if it is - unseekable. - \param _initial_bytes The number of bytes in \a _initial_data. - If the stream is seekable, its current position (as - reported by - tell() - at the start of this function) must be equal to - \a _initial_bytes. - Otherwise, seeking to absolute positions will - generate inconsistent results. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in NULL if you don't want - the failure code. - See op_open_callbacks() for a full list of failure - codes. - \return A partially opened \c OggOpusFile, or NULL on error. - libopusfile does not take ownership of the source - if the call fails. - The calling application is responsible for closing the source if - this call returns an error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_source, - const OpusFileCallbacks *_cb,const unsigned char *_initial_data, - size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); - -/**Finish opening a stream partially opened with op_test_callbacks() or one of - the associated convenience functions. - If this function fails, you are still responsible for freeing the - \c OggOpusFile with op_free(). - \param _of The \c OggOpusFile to finish opening. - \return 0 on success, or a negative value on error. - \retval #OP_EREAD An underlying read, seek, or tell operation failed - when it should have succeeded. - \retval #OP_EFAULT There was a memory allocation failure, or an - internal library error. - \retval #OP_EIMPL The stream used a feature that is not implemented, - such as an unsupported channel family. - \retval #OP_EINVAL The stream was not partially opened with - op_test_callbacks() or one of the associated - convenience functions. - \retval #OP_ENOTFORMAT The stream contained a link that did not have any - logical Opus streams in it. - \retval #OP_EBADHEADER A required header packet was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An ID header contained an unrecognized version - number. - \retval #OP_EBADLINK We failed to find data we had seen before after - seeking. - \retval #OP_EBADTIMESTAMP The first or last timestamp in a link failed basic - validity checks.*/ -int op_test_open(OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Release all memory used by an \c OggOpusFile. - \param _of The \c OggOpusFile to free.*/ -void op_free(OggOpusFile *_of); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_info Stream Information*/ -/*@{*/ -/**\name Functions for obtaining information about streams - - These functions allow you to get basic information about a stream, including - seekability, the number of links (for chained streams), plus the size, - duration, bitrate, header parameters, and meta information for each link - (or, where available, the stream as a whole). - Some of these (size, duration) are only available for seekable streams. - You can also query the current stream position, link, and playback time, - and instantaneous bitrate during playback. - - Some of these functions may be used successfully on the partially open - streams returned by op_test_callbacks() or one of the associated - convenience functions. - Their documention will indicate so explicitly.*/ -/*@{*/ - -/**Returns whether or not the data source being read is seekable. - This is true if -
      -
    1. The seek() and - tell() callbacks are both - non-NULL,
    2. -
    3. The seek() callback was - successfully executed at least once, and
    4. -
    5. The tell() callback was - successfully able to report the position indicator afterwards.
    6. -
    - This function may be called on partially-opened streams. - \param _of The \c OggOpusFile whose seekable status is to be returned. - \return A non-zero value if seekable, and 0 if unseekable.*/ -int op_seekable(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Returns the number of links in this chained stream. - This function may be called on partially-opened streams, but it will always - return 1. - The actual number of links is not known until the stream is fully opened. - \param _of The \c OggOpusFile from which to retrieve the link count. - \return For fully-open seekable sources, this returns the total number of - links in the whole stream, which will be at least 1. - For partially-open or unseekable sources, this always returns 1.*/ -int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Get the serial number of the given link in a (possibly-chained) Ogg Opus - stream. - This function may be called on partially-opened streams, but it will always - return the serial number of the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the serial number. - \param _li The index of the link whose serial number should be retrieved. - Use a negative number to get the serial number of the current - link. - \return The serial number of the given link. - If \a _li is greater than the total number of links, this returns - the serial number of the last link. - If the source is not seekable, this always returns the serial number - of the current link.*/ -opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the channel count of the given link in a (possibly-chained) Ogg Opus - stream. - This is equivalent to op_head(_of,_li)->channel_count, but - is provided for convenience. - This function may be called on partially-opened streams, but it will always - return the channel count of the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the channel count. - \param _li The index of the link whose channel count should be retrieved. - Use a negative number to get the channel count of the current - link. - \return The channel count of the given link. - If \a _li is greater than the total number of links, this returns - the channel count of the last link. - If the source is not seekable, this always returns the channel count - of the current link.*/ -int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the total (compressed) size of the stream, or of an individual link in - a (possibly-chained) Ogg Opus stream, including all headers and Ogg muxing - overhead. - \warning If the Opus stream (or link) is concurrently multiplexed with other - logical streams (e.g., video), this returns the size of the entire stream - (or link), not just the number of bytes in the first logical Opus stream. - Returning the latter would require scanning the entire file. - \param _of The \c OggOpusFile from which to retrieve the compressed size. - \param _li The index of the link whose compressed size should be computed. - Use a negative number to get the compressed size of the entire - stream. - \return The compressed size of the entire stream if \a _li is negative, the - compressed size of link \a _li if it is non-negative, or a negative - value on error. - The compressed size of the entire stream may be smaller than that - of the underlying source if trailing garbage was detected in the - file. - \retval #OP_EINVAL The source is not seekable (so we can't know the length), - \a _li wasn't less than the total number of links in - the stream, or the stream was only partially open.*/ -opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the total PCM length (number of samples at 48 kHz) of the stream, or of - an individual link in a (possibly-chained) Ogg Opus stream. - Users looking for op_time_total() should use op_pcm_total() - instead. - Because timestamps in Opus are fixed at 48 kHz, there is no need for a - separate function to convert this to seconds (and leaving it out avoids - introducing floating point to the API, for those that wish to avoid it). - \param _of The \c OggOpusFile from which to retrieve the PCM offset. - \param _li The index of the link whose PCM length should be computed. - Use a negative number to get the PCM length of the entire stream. - \return The PCM length of the entire stream if \a _li is negative, the PCM - length of link \a _li if it is non-negative, or a negative value on - error. - \retval #OP_EINVAL The source is not seekable (so we can't know the length), - \a _li wasn't less than the total number of links in - the stream, or the stream was only partially open.*/ -ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the ID header information for the given link in a (possibly chained) Ogg - Opus stream. - This function may be called on partially-opened streams, but it will always - return the ID header information of the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the ID header - information. - \param _li The index of the link whose ID header information should be - retrieved. - Use a negative number to get the ID header information of the - current link. - For an unseekable stream, \a _li is ignored, and the ID header - information for the current link is always returned, if - available. - \return The contents of the ID header for the given link.*/ -const OpusHead *op_head(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the comment header information for the given link in a (possibly - chained) Ogg Opus stream. - This function may be called on partially-opened streams, but it will always - return the tags from the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the comment header - information. - \param _li The index of the link whose comment header information should be - retrieved. - Use a negative number to get the comment header information of - the current link. - For an unseekable stream, \a _li is ignored, and the comment - header information for the current link is always returned, if - available. - \return The contents of the comment header for the given link, or - NULL if this is an unseekable stream that encountered - an invalid link.*/ -const OpusTags *op_tags(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Retrieve the index of the current link. - This is the link that produced the data most recently read by - op_read_float() or its associated functions, or, after a seek, the link - that the seek target landed in. - Reading more data may advance the link index (even on the first read after a - seek). - \param _of The \c OggOpusFile from which to retrieve the current link index. - \return The index of the current link on success, or a negative value on - failure. - For seekable streams, this is a number between 0 and the value - returned by op_link_count(). - For unseekable streams, this value starts at 0 and increments by one - each time a new link is encountered (even though op_link_count() - always returns 1). - \retval #OP_EINVAL The stream was only partially open.*/ -int op_current_link(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Computes the bitrate of the stream, or of an individual link in a - (possibly-chained) Ogg Opus stream. - The stream must be seekable to compute the bitrate. - For unseekable streams, use op_bitrate_instant() to get periodic estimates. - \warning If the Opus stream (or link) is concurrently multiplexed with other - logical streams (e.g., video), this uses the size of the entire stream (or - link) to compute the bitrate, not just the number of bytes in the first - logical Opus stream. - Returning the latter requires scanning the entire file, but this may be done - by decoding the whole file and calling op_bitrate_instant() once at the - end. - Install a trivial decoding callback with op_set_decode_callback() if you - wish to skip actual decoding during this process. - \param _of The \c OggOpusFile from which to retrieve the bitrate. - \param _li The index of the link whose bitrate should be computed. - Use a negative number to get the bitrate of the whole stream. - \return The bitrate on success, or a negative value on error. - \retval #OP_EINVAL The stream was only partially open, the stream was not - seekable, or \a _li was larger than the number of - links.*/ -opus_int32 op_bitrate(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Compute the instantaneous bitrate, measured as the ratio of bits to playable - samples decoded since a) the last call to op_bitrate_instant(), b) the last - seek, or c) the start of playback, whichever was most recent. - This will spike somewhat after a seek or at the start/end of a chain - boundary, as pre-skip, pre-roll, and end-trimming causes samples to be - decoded but not played. - \param _of The \c OggOpusFile from which to retrieve the bitrate. - \return The bitrate, in bits per second, or a negative value on error. - \retval #OP_FALSE No data has been decoded since any of the events - described above. - \retval #OP_EINVAL The stream was only partially open.*/ -opus_int32 op_bitrate_instant(OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Obtain the current value of the position indicator for \a _of. - \param _of The \c OggOpusFile from which to retrieve the position indicator. - \return The byte position that is currently being read from. - \retval #OP_EINVAL The stream was only partially open.*/ -opus_int64 op_raw_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Obtain the PCM offset of the next sample to be read. - If the stream is not properly timestamped, this might not increment by the - proper amount between reads, or even return monotonically increasing - values. - \param _of The \c OggOpusFile from which to retrieve the PCM offset. - \return The PCM offset of the next sample to be read. - \retval #OP_EINVAL The stream was only partially open.*/ -ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_seeking Seeking*/ -/*@{*/ -/**\name Functions for seeking in Opus streams - - These functions let you seek in Opus streams, if the underlying source - support it. - Seeking is implemented for all built-in stream I/O routines, though some - individual sources may not be seekable (pipes, live HTTP streams, or HTTP - streams from a server that does not support Range requests). - - op_raw_seek() is the fastest: it is guaranteed to perform at most one - physical seek, but, since the target is a byte position, makes no guarantee - how close to a given time it will come. - op_pcm_seek() provides sample-accurate seeking. - The number of physical seeks it requires is still quite small (often 1 or - 2, even in highly variable bitrate streams). - - Seeking in Opus requires decoding some pre-roll amount before playback to - allow the internal state to converge (as if recovering from packet loss). - This is handled internally by libopusfile, but means there is - little extra overhead for decoding up to the exact position requested - (since it must decode some amount of audio anyway). - It also means that decoding after seeking may not return exactly the same - values as would be obtained by decoding the stream straight through. - However, such differences are expected to be smaller than the loss - introduced by Opus's lossy compression.*/ -/*@{*/ - -/**Seek to a byte offset relative to the compressed data. - This also scans packets to update the PCM cursor. - It will cross a logical bitstream boundary, but only if it can't get any - packets out of the tail of the link to which it seeks. - \param _of The \c OggOpusFile in which to seek. - \param _byte_offset The byte position to seek to. - \return 0 on success, or a negative error code on failure. - \retval #OP_EREAD The underlying seek operation failed. - \retval #OP_EINVAL The stream was only partially open, or the target was - outside the valid range for the stream. - \retval #OP_ENOSEEK This stream is not seekable. - \retval #OP_EBADLINK Failed to initialize a decoder for a stream for an - unknown reason.*/ -int op_raw_seek(OggOpusFile *_of,opus_int64 _byte_offset) OP_ARG_NONNULL(1); - -/**Seek to the specified PCM offset, such that decoding will begin at exactly - the requested position. - \param _of The \c OggOpusFile in which to seek. - \param _pcm_offset The PCM offset to seek to. - This is in samples at 48 kHz relative to the start of the - stream. - \return 0 on success, or a negative value on error. - \retval #OP_EREAD An underlying read or seek operation failed. - \retval #OP_EINVAL The stream was only partially open, or the target was - outside the valid range for the stream. - \retval #OP_ENOSEEK This stream is not seekable. - \retval #OP_EBADLINK We failed to find data we had seen before, or the - bitstream structure was sufficiently malformed that - seeking to the target destination was impossible.*/ -int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset) OP_ARG_NONNULL(1); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_decoding Decoding*/ -/*@{*/ -/**\name Functions for decoding audio data - - These functions retrieve actual decoded audio data from the stream. - The general functions, op_read() and op_read_float() return 16-bit or - floating-point output, both using native endian ordering. - The number of channels returned can change from link to link in a chained - stream. - There are special functions, op_read_stereo() and op_read_float_stereo(), - which always output two channels, to simplify applications which do not - wish to handle multichannel audio. - These downmix multichannel files to two channels, so they can always return - samples in the same format for every link in a chained file. - - If the rest of your audio processing chain can handle floating point, the - floating-point routines should be preferred, as they prevent clipping and - other issues which might be avoided entirely if, e.g., you scale down the - volume at some other stage. - However, if you intend to consume 16-bit samples directly, the conversion in - libopusfile provides noise-shaping dithering and, if compiled - against libopus 1.1 or later, soft-clipping prevention. - - libopusfile can also be configured at compile time to use the - fixed-point libopus API. - If so, libopusfile's floating-point API may also be disabled. - In that configuration, nothing in libopusfile will use any - floating-point operations, to simplify support on devices without an - adequate FPU. - - \warning HTTPS streams may be be vulnerable to truncation attacks if you do - not check the error return code from op_read_float() or its associated - functions. - If the remote peer does not close the connection gracefully (with a TLS - "close notify" message), these functions will return #OP_EREAD instead of 0 - when they reach the end of the file. - If you are reading from an URL (particularly if seeking is not - supported), you should make sure to check for this error and warn the user - appropriately.*/ -/*@{*/ - -/**Indicates that the decoding callback should produce signed 16-bit - native-endian output samples.*/ -#define OP_DEC_FORMAT_SHORT (7008) -/**Indicates that the decoding callback should produce 32-bit native-endian - float samples.*/ -#define OP_DEC_FORMAT_FLOAT (7040) - -/**Indicates that the decoding callback did not decode anything, and that - libopusfile should decode normally instead.*/ -#define OP_DEC_USE_DEFAULT (6720) - -/**Called to decode an Opus packet. - This should invoke the functional equivalent of opus_multistream_decode() or - opus_multistream_decode_float(), except that it returns 0 on success - instead of the number of decoded samples (which is known a priori). - \param _ctx The application-provided callback context. - \param _decoder The decoder to use to decode the packet. - \param[out] _pcm The buffer to decode into. - This will always have enough room for \a _nchannels of - \a _nsamples samples, which should be placed into this - buffer interleaved. - \param _op The packet to decode. - This will always have its granule position set to a valid - value. - \param _nsamples The number of samples expected from the packet. - \param _nchannels The number of channels expected from the packet. - \param _format The desired sample output format. - This is either #OP_DEC_FORMAT_SHORT or - #OP_DEC_FORMAT_FLOAT. - \param _li The index of the link from which this packet was decoded. - \return A non-negative value on success, or a negative value on error. - Any error codes should be the same as those returned by - opus_multistream_decode() or opus_multistream_decode_float(). - Success codes are as follows: - \retval 0 Decoding was successful. - The application has filled the buffer with - exactly \a _nsamples*\a - _nchannels samples in the requested - format. - \retval #OP_DEC_USE_DEFAULT No decoding was done. - libopusfile should do the decoding - by itself instead.*/ -typedef int (*op_decode_cb_func)(void *_ctx,OpusMSDecoder *_decoder,void *_pcm, - const ogg_packet *_op,int _nsamples,int _nchannels,int _format,int _li); - -/**Sets the packet decode callback function. - If set, this is called once for each packet that needs to be decoded. - This can be used by advanced applications to do additional processing on the - compressed or uncompressed data. - For example, an application might save the final entropy coder state for - debugging and testing purposes, or it might apply additional filters - before the downmixing, dithering, or soft-clipping performed by - libopusfile, so long as these filters do not introduce any - latency. - - A call to this function is no guarantee that the audio will eventually be - delivered to the application. - libopusfile may discard some or all of the decoded audio data - (i.e., at the beginning or end of a link, or after a seek), however the - callback is still required to provide all of it. - \param _of The \c OggOpusFile on which to set the decode callback. - \param _decode_cb The callback function to call. - This may be NULL to disable calling the - callback. - \param _ctx The application-provided context pointer to pass to the - callback on each call.*/ -void op_set_decode_callback(OggOpusFile *_of, - op_decode_cb_func _decode_cb,void *_ctx) OP_ARG_NONNULL(1); - -/**Gain offset type that indicates that the provided offset is relative to the - header gain. - This is the default.*/ -#define OP_HEADER_GAIN (0) - -/**Gain offset type that indicates that the provided offset is relative to the - R128_ALBUM_GAIN value (if any), in addition to the header gain.*/ -#define OP_ALBUM_GAIN (3007) - -/**Gain offset type that indicates that the provided offset is relative to the - R128_TRACK_GAIN value (if any), in addition to the header gain.*/ -#define OP_TRACK_GAIN (3008) - -/**Gain offset type that indicates that the provided offset should be used as - the gain directly, without applying any the header or track gains.*/ -#define OP_ABSOLUTE_GAIN (3009) - -/**Sets the gain to be used for decoded output. - By default, the gain in the header is applied with no additional offset. - The total gain (including header gain and/or track gain, if applicable, and - this offset), will be clamped to [-32768,32767]/256 dB. - This is more than enough to saturate or underflow 16-bit PCM. - \note The new gain will not be applied to any already buffered, decoded - output. - This means you cannot change it sample-by-sample, as at best it will be - updated packet-by-packet. - It is meant for setting a target volume level, rather than applying smooth - fades, etc. - \param _of The \c OggOpusFile on which to set the gain offset. - \param _gain_type One of #OP_HEADER_GAIN, #OP_ALBUM_GAIN, - #OP_TRACK_GAIN, or #OP_ABSOLUTE_GAIN. - \param _gain_offset_q8 The gain offset to apply, in 1/256ths of a dB. - \return 0 on success or a negative value on error. - \retval #OP_EINVAL The \a _gain_type was unrecognized.*/ -int op_set_gain_offset(OggOpusFile *_of, - int _gain_type,opus_int32 _gain_offset_q8) OP_ARG_NONNULL(1); - -/**Sets whether or not dithering is enabled for 16-bit decoding. - By default, when libopusfile is compiled to use floating-point - internally, calling op_read() or op_read_stereo() will first decode to - float, and then convert to fixed-point using noise-shaping dithering. - This flag can be used to disable that dithering. - When the application uses op_read_float() or op_read_float_stereo(), or when - the library has been compiled to decode directly to fixed point, this flag - has no effect. - \param _of The \c OggOpusFile on which to enable or disable dithering. - \param _enabled A non-zero value to enable dithering, or 0 to disable it.*/ -void op_set_dither_enabled(OggOpusFile *_of,int _enabled) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream. - \note Although \a _buf_size must indicate the total number of values that - can be stored in \a _pcm, the return value is the number of samples - per channel. - This is done because -
      -
    1. The channel count cannot be known a priori (reading more samples might - advance us into the next link, with a different channel count), so - \a _buf_size cannot also be in units of samples per channel,
    2. -
    3. Returning the samples per channel matches the libopus API - as closely as we're able,
    4. -
    5. Returning the total number of values instead of samples per channel - would mean the caller would need a division to compute the samples per - channel, and might worry about the possibility of getting back samples - for some channels and not others, and
    6. -
    7. This approach is relatively fool-proof: if an application passes too - small a value to \a _buf_size, they will simply get fewer samples back, - and if they assume the return value is the total number of values, then - they will simply read too few (rather than reading too many and going - off the end of the buffer).
    8. -
    - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples, as - signed native-endian 16-bit values at 48 kHz - with a nominal range of [-32768,32767). - Multiple channels are interleaved using the - Vorbis - channel ordering. - This must have room for at least \a _buf_size values. - \param _buf_size The number of values that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (5760 - values per channel). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - libopusfile may return less data than - requested. - If so, there is no guarantee that the remaining data - in \a _pcm will be unmodified. - \param[out] _li The index of the link this data was decoded from. - You may pass NULL if you do not need this - information. - If this function fails (returning a negative value), - this parameter is left unset. - \return The number of samples read per channel on success, or a negative - value on failure. - The channel count can be retrieved on success by calling - op_head(_of,*_li). - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for all channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read(OggOpusFile *_of, - opus_int16 *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream. - \note Although \a _buf_size must indicate the total number of values that - can be stored in \a _pcm, the return value is the number of samples - per channel. -
      -
    1. The channel count cannot be known a priori (reading more samples might - advance us into the next link, with a different channel count), so - \a _buf_size cannot also be in units of samples per channel,
    2. -
    3. Returning the samples per channel matches the libopus API - as closely as we're able,
    4. -
    5. Returning the total number of values instead of samples per channel - would mean the caller would need a division to compute the samples per - channel, and might worry about the possibility of getting back samples - for some channels and not others, and
    6. -
    7. This approach is relatively fool-proof: if an application passes too - small a value to \a _buf_size, they will simply get fewer samples back, - and if they assume the return value is the total number of values, then - they will simply read too few (rather than reading too many and going - off the end of the buffer).
    8. -
    - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples as - signed floats at 48 kHz with a nominal range of - [-1.0,1.0]. - Multiple channels are interleaved using the - Vorbis - channel ordering. - This must have room for at least \a _buf_size floats. - \param _buf_size The number of floats that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (5760 - samples per channel). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - If less than \a _buf_size values are returned, - libopusfile makes no guarantee that the - remaining data in \a _pcm will be unmodified. - \param[out] _li The index of the link this data was decoded from. - You may pass NULL if you do not need this - information. - If this function fails (returning a negative value), - this parameter is left unset. - \return The number of samples read per channel on success, or a negative - value on failure. - The channel count can be retrieved on success by calling - op_head(_of,*_li). - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for all channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read_float(OggOpusFile *_of, - float *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream and downmixes to stereo, if necessary. - This function is intended for simple players that want a uniform output - format, even if the channel count changes between links in a chained - stream. - \note \a _buf_size indicates the total number of values that can be stored - in \a _pcm, while the return value is the number of samples per - channel, even though the channel count is known, for consistency with - op_read(). - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples, as - signed native-endian 16-bit values at 48 kHz - with a nominal range of [-32768,32767). - The left and right channels are interleaved in the - buffer. - This must have room for at least \a _buf_size values. - \param _buf_size The number of values that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (11520 - values total). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - If less than \a _buf_size values are returned, - libopusfile makes no guarantee that the - remaining data in \a _pcm will be unmodified. - \return The number of samples read per channel on success, or a negative - value on failure. - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for both channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read_stereo(OggOpusFile *_of, - opus_int16 *_pcm,int _buf_size) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream and downmixes to stereo, if necessary. - This function is intended for simple players that want a uniform output - format, even if the channel count changes between links in a chained - stream. - \note \a _buf_size indicates the total number of values that can be stored - in \a _pcm, while the return value is the number of samples per - channel, even though the channel count is known, for consistency with - op_read_float(). - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples, as - signed floats at 48 kHz with a nominal range of - [-1.0,1.0]. - The left and right channels are interleaved in the - buffer. - This must have room for at least \a _buf_size values. - \param _buf_size The number of values that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (11520 - values total). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - If less than \a _buf_size values are returned, - libopusfile makes no guarantee that the - remaining data in \a _pcm will be unmodified. - \return The number of samples read per channel on success, or a negative - value on failure. - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for both channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - that did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read_float_stereo(OggOpusFile *_of, - float *_pcm,int _buf_size) OP_ARG_NONNULL(1); - -/*@}*/ -/*@}*/ - -# if OP_GNUC_PREREQ(4,0) -# pragma GCC visibility pop -# endif - -# if defined(__cplusplus) -} -# endif - -#endif diff --git a/thirdparty/opus/opus_compare.c b/thirdparty/opus/opus_compare.c deleted file mode 100644 index 06c67d752f..0000000000 --- a/thirdparty/opus/opus_compare.c +++ /dev/null @@ -1,379 +0,0 @@ -/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation - Written by Jean-Marc Valin and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include -#include - -#define OPUS_PI (3.14159265F) - -#define OPUS_COSF(_x) ((float)cos(_x)) -#define OPUS_SINF(_x) ((float)sin(_x)) - -static void *check_alloc(void *_ptr){ - if(_ptr==NULL){ - fprintf(stderr,"Out of memory.\n"); - exit(EXIT_FAILURE); - } - return _ptr; -} - -static void *opus_malloc(size_t _size){ - return check_alloc(malloc(_size)); -} - -static void *opus_realloc(void *_ptr,size_t _size){ - return check_alloc(realloc(_ptr,_size)); -} - -static size_t read_pcm16(float **_samples,FILE *_fin,int _nchannels){ - unsigned char buf[1024]; - float *samples; - size_t nsamples; - size_t csamples; - size_t xi; - size_t nread; - samples=NULL; - nsamples=csamples=0; - for(;;){ - nread=fread(buf,2*_nchannels,1024/(2*_nchannels),_fin); - if(nread<=0)break; - if(nsamples+nread>csamples){ - do csamples=csamples<<1|1; - while(nsamples+nread>csamples); - samples=(float *)opus_realloc(samples, - _nchannels*csamples*sizeof(*samples)); - } - for(xi=0;xi=_window_sz)ti-=_window_sz; - } - re*=_downsample; - im*=_downsample; - _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+100000; - p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci]; - } - } - if(_out){ - _out[(xi*_nbands+bi)*_nchannels]=p[0]/(_bands[bi+1]-_bands[bi]); - if(_nchannels==2){ - _out[(xi*_nbands+bi)*_nchannels+1]=p[1]/(_bands[bi+1]-_bands[bi]); - } - } - } - } - free(window); -} - -#define NBANDS (21) -#define NFREQS (240) - -/*Bands on which we compute the pseudo-NMR (Bark-derived - CELT bands).*/ -static const int BANDS[NBANDS+1]={ - 0,2,4,6,8,10,12,14,16,20,24,28,32,40,48,56,68,80,96,120,156,200 -}; - -#define TEST_WIN_SIZE (480) -#define TEST_WIN_STEP (120) - -int main(int _argc,const char **_argv){ - FILE *fin1; - FILE *fin2; - float *x; - float *y; - float *xb; - float *X; - float *Y; - double err; - float Q; - size_t xlength; - size_t ylength; - size_t nframes; - size_t xi; - int ci; - int xj; - int bi; - int nchannels; - unsigned rate; - int downsample; - int ybands; - int yfreqs; - int max_compare; - if(_argc<3||_argc>6){ - fprintf(stderr,"Usage: %s [-s] [-r rate2] \n", - _argv[0]); - return EXIT_FAILURE; - } - nchannels=1; - if(strcmp(_argv[1],"-s")==0){ - nchannels=2; - _argv++; - } - rate=48000; - ybands=NBANDS; - yfreqs=NFREQS; - downsample=1; - if(strcmp(_argv[1],"-r")==0){ - rate=atoi(_argv[2]); - if(rate!=8000&&rate!=12000&&rate!=16000&&rate!=24000&&rate!=48000){ - fprintf(stderr, - "Sampling rate must be 8000, 12000, 16000, 24000, or 48000\n"); - return EXIT_FAILURE; - } - downsample=48000/rate; - switch(rate){ - case 8000:ybands=13;break; - case 12000:ybands=15;break; - case 16000:ybands=17;break; - case 24000:ybands=19;break; - } - yfreqs=NFREQS/downsample; - _argv+=2; - } - fin1=fopen(_argv[1],"rb"); - if(fin1==NULL){ - fprintf(stderr,"Error opening '%s'.\n",_argv[1]); - return EXIT_FAILURE; - } - fin2=fopen(_argv[2],"rb"); - if(fin2==NULL){ - fprintf(stderr,"Error opening '%s'.\n",_argv[2]); - fclose(fin1); - return EXIT_FAILURE; - } - /*Read in the data and allocate scratch space.*/ - xlength=read_pcm16(&x,fin1,2); - if(nchannels==1){ - for(xi=0;xi0;){ - for(ci=0;ci0){ - /*Temporal masking: -3 dB/2.5ms slope.*/ - for(bi=0;bi=79&&xj<=81)im*=0.1F; - if(xj==80)im*=0.1F; - Eb+=im; - } - } - Eb /= (BANDS[bi+1]-BANDS[bi])*nchannels; - Ef += Eb*Eb; - } - /*Using a fixed normalization value means we're willing to accept slightly - lower quality for lower sampling rates.*/ - Ef/=NBANDS; - Ef*=Ef; - err+=Ef*Ef; - } - err=pow(err/nframes,1.0/16); - Q=100*(1-0.5*log(1+err)/log(1.13)); - if(Q<0){ - fprintf(stderr,"Test vector FAILS\n"); - fprintf(stderr,"Internal weighted error is %f\n",err); - return EXIT_FAILURE; - } - else{ - fprintf(stderr,"Test vector PASSES\n"); - fprintf(stderr, - "Opus quality metric: %.1f %% (internal weighted error is %f)\n",Q,err); - return EXIT_SUCCESS; - } -} diff --git a/thirdparty/opus/opus_decoder.c b/thirdparty/opus/opus_decoder.c deleted file mode 100644 index 080bec5072..0000000000 --- a/thirdparty/opus/opus_decoder.c +++ /dev/null @@ -1,981 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#ifndef OPUS_BUILD -# error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details." -#endif - -#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) && !defined(OPUS_WILL_BE_SLOW) -# pragma message "You appear to be compiling without optimization, if so opus will be very slow." -#endif - -#include -#include "celt.h" -#include "opus.h" -#include "entdec.h" -#include "modes.h" -#include "API.h" -#include "stack_alloc.h" -#include "float_cast.h" -#include "opus_private.h" -#include "os_support.h" -#include "structs.h" -#include "define.h" -#include "mathops.h" -#include "cpu_support.h" - -struct OpusDecoder { - int celt_dec_offset; - int silk_dec_offset; - int channels; - opus_int32 Fs; /** Sampling rate (at the API level) */ - silk_DecControlStruct DecControl; - int decode_gain; - int arch; - - /* Everything beyond this point gets cleared on a reset */ -#define OPUS_DECODER_RESET_START stream_channels - int stream_channels; - - int bandwidth; - int mode; - int prev_mode; - int frame_size; - int prev_redundancy; - int last_packet_duration; -#ifndef FIXED_POINT - opus_val16 softclip_mem[2]; -#endif - - opus_uint32 rangeFinal; -}; - - -int opus_decoder_get_size(int channels) -{ - int silkDecSizeBytes, celtDecSizeBytes; - int ret; - if (channels<1 || channels > 2) - return 0; - ret = silk_Get_Decoder_Size( &silkDecSizeBytes ); - if(ret) - return 0; - silkDecSizeBytes = align(silkDecSizeBytes); - celtDecSizeBytes = celt_decoder_get_size(channels); - return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes; -} - -int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) -{ - void *silk_dec; - CELTDecoder *celt_dec; - int ret, silkDecSizeBytes; - - if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000) - || (channels!=1&&channels!=2)) - return OPUS_BAD_ARG; - - OPUS_CLEAR((char*)st, opus_decoder_get_size(channels)); - /* Initialize SILK encoder */ - ret = silk_Get_Decoder_Size(&silkDecSizeBytes); - if (ret) - return OPUS_INTERNAL_ERROR; - - silkDecSizeBytes = align(silkDecSizeBytes); - st->silk_dec_offset = align(sizeof(OpusDecoder)); - st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes; - silk_dec = (char*)st+st->silk_dec_offset; - celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); - st->stream_channels = st->channels = channels; - - st->Fs = Fs; - st->DecControl.API_sampleRate = st->Fs; - st->DecControl.nChannelsAPI = st->channels; - - /* Reset decoder */ - ret = silk_InitDecoder( silk_dec ); - if(ret)return OPUS_INTERNAL_ERROR; - - /* Initialize CELT decoder */ - ret = celt_decoder_init(celt_dec, Fs, channels); - if(ret!=OPUS_OK)return OPUS_INTERNAL_ERROR; - - celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0)); - - st->prev_mode = 0; - st->frame_size = Fs/400; - st->arch = opus_select_arch(); - return OPUS_OK; -} - -OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error) -{ - int ret; - OpusDecoder *st; - if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000) - || (channels!=1&&channels!=2)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels)); - if (st == NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_decoder_init(st, Fs, channels); - if (error) - *error = ret; - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - return st; -} - -static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2, - opus_val16 *out, int overlap, int channels, - const opus_val16 *window, opus_int32 Fs) -{ - int i, c; - int inc = 48000/Fs; - for (c=0;csilk_dec_offset; - celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); - F20 = st->Fs/50; - F10 = F20>>1; - F5 = F10>>1; - F2_5 = F5>>1; - if (frame_size < F2_5) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - /* Limit frame_size to avoid excessive stack allocations. */ - frame_size = IMIN(frame_size, st->Fs/25*3); - /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */ - if (len<=1) - { - data = NULL; - /* In that case, don't conceal more than what the ToC says */ - frame_size = IMIN(frame_size, st->frame_size); - } - if (data != NULL) - { - audiosize = st->frame_size; - mode = st->mode; - ec_dec_init(&dec,(unsigned char*)data,len); - } else { - audiosize = frame_size; - mode = st->prev_mode; - - if (mode == 0) - { - /* If we haven't got any packet yet, all we can do is return zeros */ - for (i=0;ichannels;i++) - pcm[i] = 0; - RESTORE_STACK; - return audiosize; - } - - /* Avoids trying to run the PLC on sizes other than 2.5 (CELT), 5 (CELT), - 10, or 20 (e.g. 12.5 or 30 ms). */ - if (audiosize > F20) - { - do { - int ret = opus_decode_frame(st, NULL, 0, pcm, IMIN(audiosize, F20), 0); - if (ret<0) - { - RESTORE_STACK; - return ret; - } - pcm += ret*st->channels; - audiosize -= ret; - } while (audiosize > 0); - RESTORE_STACK; - return frame_size; - } else if (audiosize < F20) - { - if (audiosize > F10) - audiosize = F10; - else if (mode != MODE_SILK_ONLY && audiosize > F5 && audiosize < F10) - audiosize = F5; - } - } - - /* In fixed-point, we can tell CELT to do the accumulation on top of the - SILK PCM buffer. This saves some stack space. */ -#ifdef FIXED_POINT - celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); -#else - celt_accum = 0; -#endif - - pcm_transition_silk_size = ALLOC_NONE; - pcm_transition_celt_size = ALLOC_NONE; - if (data!=NULL && st->prev_mode > 0 && ( - (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy) - || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ) - ) - { - transition = 1; - /* Decide where to allocate the stack memory for pcm_transition */ - if (mode == MODE_CELT_ONLY) - pcm_transition_celt_size = F5*st->channels; - else - pcm_transition_silk_size = F5*st->channels; - } - ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16); - if (transition && mode == MODE_CELT_ONLY) - { - pcm_transition = pcm_transition_celt; - opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); - } - if (audiosize > frame_size) - { - /*fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);*/ - RESTORE_STACK; - return OPUS_BAD_ARG; - } else { - frame_size = audiosize; - } - - /* Don't allocate any memory when in CELT-only mode */ - pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; - ALLOC(pcm_silk, pcm_silk_size, opus_int16); - - /* SILK processing */ - if (mode != MODE_CELT_ONLY) - { - int lost_flag, decoded_samples; - opus_int16 *pcm_ptr; -#ifdef FIXED_POINT - if (celt_accum) - pcm_ptr = pcm; - else -#endif - pcm_ptr = pcm_silk; - - if (st->prev_mode==MODE_CELT_ONLY) - silk_InitDecoder( silk_dec ); - - /* The SILK PLC cannot produce frames of less than 10 ms */ - st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs); - - if (data != NULL) - { - st->DecControl.nChannelsInternal = st->stream_channels; - if( mode == MODE_SILK_ONLY ) { - if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) { - st->DecControl.internalSampleRate = 8000; - } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) { - st->DecControl.internalSampleRate = 12000; - } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) { - st->DecControl.internalSampleRate = 16000; - } else { - st->DecControl.internalSampleRate = 16000; - silk_assert( 0 ); - } - } else { - /* Hybrid mode */ - st->DecControl.internalSampleRate = 16000; - } - } - - lost_flag = data == NULL ? 1 : 2 * decode_fec; - decoded_samples = 0; - do { - /* Call SILK decoder */ - int first_frame = decoded_samples == 0; - silk_ret = silk_Decode( silk_dec, &st->DecControl, - lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size, st->arch ); - if( silk_ret ) { - if (lost_flag) { - /* PLC failure should not be fatal */ - silk_frame_size = frame_size; - for (i=0;ichannels;i++) - pcm_ptr[i] = 0; - } else { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - } - pcm_ptr += silk_frame_size * st->channels; - decoded_samples += silk_frame_size; - } while( decoded_samples < frame_size ); - } - - start_band = 0; - if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL - && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len) - { - /* Check if we have a redundant 0-8 kHz band */ - if (mode == MODE_HYBRID) - redundancy = ec_dec_bit_logp(&dec, 12); - else - redundancy = 1; - if (redundancy) - { - celt_to_silk = ec_dec_bit_logp(&dec, 1); - /* redundancy_bytes will be at least two, in the non-hybrid - case due to the ec_tell() check above */ - redundancy_bytes = mode==MODE_HYBRID ? - (opus_int32)ec_dec_uint(&dec, 256)+2 : - len-((ec_tell(&dec)+7)>>3); - len -= redundancy_bytes; - /* This is a sanity check. It should never happen for a valid - packet, so the exact behaviour is not normative. */ - if (len*8 < ec_tell(&dec)) - { - len = 0; - redundancy_bytes = 0; - redundancy = 0; - } - /* Shrink decoder because of raw bits */ - dec.storage -= redundancy_bytes; - } - } - if (mode != MODE_CELT_ONLY) - start_band = 17; - - { - int endband=21; - - switch(st->bandwidth) - { - case OPUS_BANDWIDTH_NARROWBAND: - endband = 13; - break; - case OPUS_BANDWIDTH_MEDIUMBAND: - case OPUS_BANDWIDTH_WIDEBAND: - endband = 17; - break; - case OPUS_BANDWIDTH_SUPERWIDEBAND: - endband = 19; - break; - case OPUS_BANDWIDTH_FULLBAND: - endband = 21; - break; - } - celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband)); - celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels)); - } - - if (redundancy) - { - transition = 0; - pcm_transition_silk_size=ALLOC_NONE; - } - - ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); - - if (transition && mode != MODE_CELT_ONLY) - { - pcm_transition = pcm_transition_silk; - opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); - } - - /* Only allocation memory for redundancy if/when needed */ - redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE; - ALLOC(redundant_audio, redundant_audio_size, opus_val16); - - /* 5 ms redundant frame for CELT->SILK*/ - if (redundancy && celt_to_silk) - { - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, - redundant_audio, F5, NULL, 0); - celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); - } - - /* MUST be after PLC */ - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band)); - - if (mode != MODE_SILK_ONLY) - { - int celt_frame_size = IMIN(F20, frame_size); - /* Make sure to discard any previous CELT state */ - if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy) - celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); - /* Decode CELT */ - celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, - len, pcm, celt_frame_size, &dec, celt_accum); - } else { - unsigned char silence[2] = {0xFF, 0xFF}; - if (!celt_accum) - { - for (i=0;ichannels;i++) - pcm[i] = 0; - } - /* For hybrid -> SILK transitions, we let the CELT MDCT - do a fade-out by decoding a silence frame */ - if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) - { - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); - } - } - - if (mode != MODE_CELT_ONLY && !celt_accum) - { -#ifdef FIXED_POINT - for (i=0;ichannels;i++) - pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); -#else - for (i=0;ichannels;i++) - pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); -#endif - } - - { - const CELTMode *celt_mode; - celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode)); - window = celt_mode->window; - } - - /* 5 ms redundant frame for SILK->CELT */ - if (redundancy && !celt_to_silk) - { - celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); - celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); - smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, - pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); - } - if (redundancy && celt_to_silk) - { - for (c=0;cchannels;c++) - { - for (i=0;ichannels*i+c] = redundant_audio[st->channels*i+c]; - } - smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5, - pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs); - } - if (transition) - { - if (audiosize >= F5) - { - for (i=0;ichannels*F2_5;i++) - pcm[i] = pcm_transition[i]; - smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5, - pcm+st->channels*F2_5, F2_5, - st->channels, window, st->Fs); - } else { - /* Not enough time to do a clean transition, but we do it anyway - This will not preserve amplitude perfectly and may introduce - a bit of temporal aliasing, but it shouldn't be too bad and - that's pretty much the best we can do. In any case, generating this - transition it pretty silly in the first place */ - smooth_fade(pcm_transition, pcm, - pcm, F2_5, - st->channels, window, st->Fs); - } - } - - if(st->decode_gain) - { - opus_val32 gain; - gain = celt_exp2(MULT16_16_P15(QCONST16(6.48814081e-4f, 25), st->decode_gain)); - for (i=0;ichannels;i++) - { - opus_val32 x; - x = MULT16_32_P16(pcm[i],gain); - pcm[i] = SATURATE(x, 32767); - } - } - - if (len <= 1) - st->rangeFinal = 0; - else - st->rangeFinal = dec.rng ^ redundant_rng; - - st->prev_mode = mode; - st->prev_redundancy = redundancy && !celt_to_silk; - - if (celt_ret>=0) - { - if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels)) - OPUS_PRINT_INT(audiosize); - } - - RESTORE_STACK; - return celt_ret < 0 ? celt_ret : audiosize; - -} - -int opus_decode_native(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, - int self_delimited, opus_int32 *packet_offset, int soft_clip) -{ - int i, nb_samples; - int count, offset; - unsigned char toc; - int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels; - /* 48 x 2.5 ms = 120 ms */ - opus_int16 size[48]; - if (decode_fec<0 || decode_fec>1) - return OPUS_BAD_ARG; - /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */ - if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0) - return OPUS_BAD_ARG; - if (len==0 || data==NULL) - { - int pcm_count=0; - do { - int ret; - ret = opus_decode_frame(st, NULL, 0, pcm+pcm_count*st->channels, frame_size-pcm_count, 0); - if (ret<0) - return ret; - pcm_count += ret; - } while (pcm_count < frame_size); - celt_assert(pcm_count == frame_size); - if (OPUS_CHECK_ARRAY(pcm, pcm_count*st->channels)) - OPUS_PRINT_INT(pcm_count); - st->last_packet_duration = pcm_count; - return pcm_count; - } else if (len<0) - return OPUS_BAD_ARG; - - packet_mode = opus_packet_get_mode(data); - packet_bandwidth = opus_packet_get_bandwidth(data); - packet_frame_size = opus_packet_get_samples_per_frame(data, st->Fs); - packet_stream_channels = opus_packet_get_nb_channels(data); - - count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL, - size, &offset, packet_offset); - if (count<0) - return count; - - data += offset; - - if (decode_fec) - { - int duration_copy; - int ret; - /* If no FEC can be present, run the PLC (recursive call) */ - if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY) - return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip); - /* Otherwise, run the PLC on everything except the size for which we might have FEC */ - duration_copy = st->last_packet_duration; - if (frame_size-packet_frame_size!=0) - { - ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip); - if (ret<0) - { - st->last_packet_duration = duration_copy; - return ret; - } - celt_assert(ret==frame_size-packet_frame_size); - } - /* Complete with FEC */ - st->mode = packet_mode; - st->bandwidth = packet_bandwidth; - st->frame_size = packet_frame_size; - st->stream_channels = packet_stream_channels; - ret = opus_decode_frame(st, data, size[0], pcm+st->channels*(frame_size-packet_frame_size), - packet_frame_size, 1); - if (ret<0) - return ret; - else { - if (OPUS_CHECK_ARRAY(pcm, frame_size*st->channels)) - OPUS_PRINT_INT(frame_size); - st->last_packet_duration = frame_size; - return frame_size; - } - } - - if (count*packet_frame_size > frame_size) - return OPUS_BUFFER_TOO_SMALL; - - /* Update the state as the last step to avoid updating it on an invalid packet */ - st->mode = packet_mode; - st->bandwidth = packet_bandwidth; - st->frame_size = packet_frame_size; - st->stream_channels = packet_stream_channels; - - nb_samples=0; - for (i=0;ichannels, frame_size-nb_samples, 0); - if (ret<0) - return ret; - celt_assert(ret==packet_frame_size); - data += size[i]; - nb_samples += ret; - } - st->last_packet_duration = nb_samples; - if (OPUS_CHECK_ARRAY(pcm, nb_samples*st->channels)) - OPUS_PRINT_INT(nb_samples); -#ifndef FIXED_POINT - if (soft_clip) - opus_pcm_soft_clip(pcm, nb_samples, st->channels, st->softclip_mem); - else - st->softclip_mem[0]=st->softclip_mem[1]=0; -#endif - return nb_samples; -} - -#ifdef FIXED_POINT - -int opus_decode(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) -{ - if(frame_size<=0) - return OPUS_BAD_ARG; - return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); -} - -#ifndef DISABLE_FLOAT_API -int opus_decode_float(OpusDecoder *st, const unsigned char *data, - opus_int32 len, float *pcm, int frame_size, int decode_fec) -{ - VARDECL(opus_int16, out); - int ret, i; - int nb_samples; - ALLOC_STACK; - - if(frame_size<=0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - if (data != NULL && len > 0 && !decode_fec) - { - nb_samples = opus_decoder_get_nb_samples(st, data, len); - if (nb_samples>0) - frame_size = IMIN(frame_size, nb_samples); - else - return OPUS_INVALID_PACKET; - } - ALLOC(out, frame_size*st->channels, opus_int16); - - ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); - if (ret > 0) - { - for (i=0;ichannels;i++) - pcm[i] = (1.f/32768.f)*(out[i]); - } - RESTORE_STACK; - return ret; -} -#endif - - -#else -int opus_decode(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec) -{ - VARDECL(float, out); - int ret, i; - int nb_samples; - ALLOC_STACK; - - if(frame_size<=0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - if (data != NULL && len > 0 && !decode_fec) - { - nb_samples = opus_decoder_get_nb_samples(st, data, len); - if (nb_samples>0) - frame_size = IMIN(frame_size, nb_samples); - else - return OPUS_INVALID_PACKET; - } - ALLOC(out, frame_size*st->channels, float); - - ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); - if (ret > 0) - { - for (i=0;ichannels;i++) - pcm[i] = FLOAT2INT16(out[i]); - } - RESTORE_STACK; - return ret; -} - -int opus_decode_float(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) -{ - if(frame_size<=0) - return OPUS_BAD_ARG; - return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); -} - -#endif - -int opus_decoder_ctl(OpusDecoder *st, int request, ...) -{ - int ret = OPUS_OK; - va_list ap; - void *silk_dec; - CELTDecoder *celt_dec; - - silk_dec = (char*)st+st->silk_dec_offset; - celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); - - - va_start(ap, request); - - switch (request) - { - case OPUS_GET_BANDWIDTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->bandwidth; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 *value = va_arg(ap, opus_uint32*); - if (!value) - { - goto bad_arg; - } - *value = st->rangeFinal; - } - break; - case OPUS_RESET_STATE: - { - OPUS_CLEAR((char*)&st->OPUS_DECODER_RESET_START, - sizeof(OpusDecoder)- - ((char*)&st->OPUS_DECODER_RESET_START - (char*)st)); - - celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); - silk_InitDecoder( silk_dec ); - st->stream_channels = st->channels; - st->frame_size = st->Fs/400; - } - break; - case OPUS_GET_SAMPLE_RATE_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->Fs; - } - break; - case OPUS_GET_PITCH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - if (st->prev_mode == MODE_CELT_ONLY) - celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value)); - else - *value = st->DecControl.prevPitchLag; - } - break; - case OPUS_GET_GAIN_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->decode_gain; - } - break; - case OPUS_SET_GAIN_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<-32768 || value>32767) - { - goto bad_arg; - } - st->decode_gain = value; - } - break; - case OPUS_GET_LAST_PACKET_DURATION_REQUEST: - { - opus_uint32 *value = va_arg(ap, opus_uint32*); - if (!value) - { - goto bad_arg; - } - *value = st->last_packet_duration; - } - break; - default: - /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/ - ret = OPUS_UNIMPLEMENTED; - break; - } - - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - -void opus_decoder_destroy(OpusDecoder *st) -{ - opus_free(st); -} - - -int opus_packet_get_bandwidth(const unsigned char *data) -{ - int bandwidth; - if (data[0]&0x80) - { - bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3); - if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - bandwidth = OPUS_BANDWIDTH_NARROWBAND; - } else if ((data[0]&0x60) == 0x60) - { - bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND : - OPUS_BANDWIDTH_SUPERWIDEBAND; - } else { - bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3); - } - return bandwidth; -} - -int opus_packet_get_nb_channels(const unsigned char *data) -{ - return (data[0]&0x4) ? 2 : 1; -} - -int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) -{ - int count; - if (len<1) - return OPUS_BAD_ARG; - count = packet[0]&0x3; - if (count==0) - return 1; - else if (count!=3) - return 2; - else if (len<2) - return OPUS_INVALID_PACKET; - else - return packet[1]&0x3F; -} - -int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, - opus_int32 Fs) -{ - int samples; - int count = opus_packet_get_nb_frames(packet, len); - - if (count<0) - return count; - - samples = count*opus_packet_get_samples_per_frame(packet, Fs); - /* Can't have more than 120 ms */ - if (samples*25 > Fs*3) - return OPUS_INVALID_PACKET; - else - return samples; -} - -int opus_decoder_get_nb_samples(const OpusDecoder *dec, - const unsigned char packet[], opus_int32 len) -{ - return opus_packet_get_nb_samples(packet, len, dec->Fs); -} diff --git a/thirdparty/opus/opus_encoder.c b/thirdparty/opus/opus_encoder.c deleted file mode 100644 index 9a516a884a..0000000000 --- a/thirdparty/opus/opus_encoder.c +++ /dev/null @@ -1,2536 +0,0 @@ -/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "celt.h" -#include "entenc.h" -#include "modes.h" -#include "API.h" -#include "stack_alloc.h" -#include "float_cast.h" -#include "opus.h" -#include "arch.h" -#include "pitch.h" -#include "opus_private.h" -#include "os_support.h" -#include "cpu_support.h" -#include "analysis.h" -#include "mathops.h" -#include "tuning_parameters.h" -#ifdef FIXED_POINT -#include "fixed/structs_FIX.h" -#else -#include "float/structs_FLP.h" -#endif - -#define MAX_ENCODER_BUFFER 480 - -typedef struct { - opus_val32 XX, XY, YY; - opus_val16 smoothed_width; - opus_val16 max_follower; -} StereoWidthState; - -struct OpusEncoder { - int celt_enc_offset; - int silk_enc_offset; - silk_EncControlStruct silk_mode; - int application; - int channels; - int delay_compensation; - int force_channels; - int signal_type; - int user_bandwidth; - int max_bandwidth; - int user_forced_mode; - int voice_ratio; - opus_int32 Fs; - int use_vbr; - int vbr_constraint; - int variable_duration; - opus_int32 bitrate_bps; - opus_int32 user_bitrate_bps; - int lsb_depth; - int encoder_buffer; - int lfe; - int arch; -#ifndef DISABLE_FLOAT_API - TonalityAnalysisState analysis; -#endif - -#define OPUS_ENCODER_RESET_START stream_channels - int stream_channels; - opus_int16 hybrid_stereo_width_Q14; - opus_int32 variable_HP_smth2_Q15; - opus_val16 prev_HB_gain; - opus_val32 hp_mem[4]; - int mode; - int prev_mode; - int prev_channels; - int prev_framesize; - int bandwidth; - int silk_bw_switch; - /* Sampling rate (at the API level) */ - int first; - opus_val16 * energy_masking; - StereoWidthState width_mem; - opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; -#ifndef DISABLE_FLOAT_API - int detected_bandwidth; -#endif - opus_uint32 rangeFinal; -}; - -/* Transition tables for the voice and music. First column is the - middle (memoriless) threshold. The second column is the hysteresis - (difference with the middle) */ -static const opus_int32 mono_voice_bandwidth_thresholds[8] = { - 11000, 1000, /* NB<->MB */ - 14000, 1000, /* MB<->WB */ - 17000, 1000, /* WB<->SWB */ - 21000, 2000, /* SWB<->FB */ -}; -static const opus_int32 mono_music_bandwidth_thresholds[8] = { - 12000, 1000, /* NB<->MB */ - 15000, 1000, /* MB<->WB */ - 18000, 2000, /* WB<->SWB */ - 22000, 2000, /* SWB<->FB */ -}; -static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { - 11000, 1000, /* NB<->MB */ - 14000, 1000, /* MB<->WB */ - 21000, 2000, /* WB<->SWB */ - 28000, 2000, /* SWB<->FB */ -}; -static const opus_int32 stereo_music_bandwidth_thresholds[8] = { - 12000, 1000, /* NB<->MB */ - 18000, 2000, /* MB<->WB */ - 21000, 2000, /* WB<->SWB */ - 30000, 2000, /* SWB<->FB */ -}; -/* Threshold bit-rates for switching between mono and stereo */ -static const opus_int32 stereo_voice_threshold = 30000; -static const opus_int32 stereo_music_threshold = 30000; - -/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */ -static const opus_int32 mode_thresholds[2][2] = { - /* voice */ /* music */ - { 64000, 16000}, /* mono */ - { 36000, 16000}, /* stereo */ -}; - -int opus_encoder_get_size(int channels) -{ - int silkEncSizeBytes, celtEncSizeBytes; - int ret; - if (channels<1 || channels > 2) - return 0; - ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); - if (ret) - return 0; - silkEncSizeBytes = align(silkEncSizeBytes); - celtEncSizeBytes = celt_encoder_get_size(channels); - return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes; -} - -int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application) -{ - void *silk_enc; - CELTEncoder *celt_enc; - int err; - int ret, silkEncSizeBytes; - - if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| - (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO - && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) - return OPUS_BAD_ARG; - - OPUS_CLEAR((char*)st, opus_encoder_get_size(channels)); - /* Create SILK encoder */ - ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); - if (ret) - return OPUS_BAD_ARG; - silkEncSizeBytes = align(silkEncSizeBytes); - st->silk_enc_offset = align(sizeof(OpusEncoder)); - st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes; - silk_enc = (char*)st+st->silk_enc_offset; - celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); - - st->stream_channels = st->channels = channels; - - st->Fs = Fs; - - st->arch = opus_select_arch(); - - ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode ); - if(ret)return OPUS_INTERNAL_ERROR; - - /* default SILK parameters */ - st->silk_mode.nChannelsAPI = channels; - st->silk_mode.nChannelsInternal = channels; - st->silk_mode.API_sampleRate = st->Fs; - st->silk_mode.maxInternalSampleRate = 16000; - st->silk_mode.minInternalSampleRate = 8000; - st->silk_mode.desiredInternalSampleRate = 16000; - st->silk_mode.payloadSize_ms = 20; - st->silk_mode.bitRate = 25000; - st->silk_mode.packetLossPercentage = 0; - st->silk_mode.complexity = 9; - st->silk_mode.useInBandFEC = 0; - st->silk_mode.useDTX = 0; - st->silk_mode.useCBR = 0; - st->silk_mode.reducedDependency = 0; - - /* Create CELT encoder */ - /* Initialize CELT encoder */ - err = celt_encoder_init(celt_enc, Fs, channels, st->arch); - if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; - - celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); - - st->use_vbr = 1; - /* Makes constrained VBR the default (safer for real-time use) */ - st->vbr_constraint = 1; - st->user_bitrate_bps = OPUS_AUTO; - st->bitrate_bps = 3000+Fs*channels; - st->application = application; - st->signal_type = OPUS_AUTO; - st->user_bandwidth = OPUS_AUTO; - st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND; - st->force_channels = OPUS_AUTO; - st->user_forced_mode = OPUS_AUTO; - st->voice_ratio = -1; - st->encoder_buffer = st->Fs/100; - st->lsb_depth = 24; - st->variable_duration = OPUS_FRAMESIZE_ARG; - - /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead - + 1.5 ms for SILK resamplers and stereo prediction) */ - st->delay_compensation = st->Fs/250; - - st->hybrid_stereo_width_Q14 = 1 << 14; - st->prev_HB_gain = Q15ONE; - st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); - st->first = 1; - st->mode = MODE_HYBRID; - st->bandwidth = OPUS_BANDWIDTH_FULLBAND; - -#ifndef DISABLE_FLOAT_API - tonality_analysis_init(&st->analysis); -#endif - - return OPUS_OK; -} - -static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels) -{ - int period; - unsigned char toc; - period = 0; - while (framerate < 400) - { - framerate <<= 1; - period++; - } - if (mode == MODE_SILK_ONLY) - { - toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5; - toc |= (period-2)<<3; - } else if (mode == MODE_CELT_ONLY) - { - int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND; - if (tmp < 0) - tmp = 0; - toc = 0x80; - toc |= tmp << 5; - toc |= period<<3; - } else /* Hybrid */ - { - toc = 0x60; - toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4; - toc |= (period-2)<<3; - } - toc |= (channels==2)<<2; - return toc; -} - -#ifndef FIXED_POINT -static void silk_biquad_float( - const opus_val16 *in, /* I: Input signal */ - const opus_int32 *B_Q28, /* I: MA coefficients [3] */ - const opus_int32 *A_Q28, /* I: AR coefficients [2] */ - opus_val32 *S, /* I/O: State vector [2] */ - opus_val16 *out, /* O: Output signal */ - const opus_int32 len, /* I: Signal length (must be even) */ - int stride -) -{ - /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ - opus_int k; - opus_val32 vout; - opus_val32 inval; - opus_val32 A[2], B[3]; - - A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28))); - A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28))); - B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28))); - B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28))); - B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28))); - - /* Negate A_Q28 values and split in two parts */ - - for( k = 0; k < len; k++ ) { - /* S[ 0 ], S[ 1 ]: Q12 */ - inval = in[ k*stride ]; - vout = S[ 0 ] + B[0]*inval; - - S[ 0 ] = S[1] - vout*A[0] + B[1]*inval; - - S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL; - - /* Scale back to Q0 and saturate */ - out[ k*stride ] = vout; - } -} -#endif - -static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) -{ - opus_int32 B_Q28[ 3 ], A_Q28[ 2 ]; - opus_int32 Fc_Q19, r_Q28, r_Q22; - - silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) ); - Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 ); - silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 ); - - r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 ); - - /* b = r * [ 1; -2; 1 ]; */ - /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */ - B_Q28[ 0 ] = r_Q28; - B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 ); - B_Q28[ 2 ] = r_Q28; - - /* -r * ( 2 - Fc * Fc ); */ - r_Q22 = silk_RSHIFT( r_Q28, 6 ); - A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) ); - A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 ); - -#ifdef FIXED_POINT - silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels ); - if( channels == 2 ) { - silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); - } -#else - silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels ); - if( channels == 2 ) { - silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); - } -#endif -} - -#ifdef FIXED_POINT -static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) -{ - int c, i; - int shift; - - /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ - shift=celt_ilog2(Fs/(cutoff_Hz*3)); - for (c=0;cFs/400; - if (st->user_bitrate_bps==OPUS_AUTO) - return 60*st->Fs/frame_size + st->Fs*st->channels; - else if (st->user_bitrate_bps==OPUS_BITRATE_MAX) - return max_data_bytes*8*st->Fs/frame_size; - else - return st->user_bitrate_bps; -} - -#ifndef DISABLE_FLOAT_API -/* Don't use more than 60 ms for the frame size analysis */ -#define MAX_DYNAMIC_FRAMESIZE 24 -/* Estimates how much the bitrate will be boosted based on the sub-frame energy */ -static float transient_boost(const float *E, const float *E_1, int LM, int maxM) -{ - int i; - int M; - float sumE=0, sumE_1=0; - float metric; - - M = IMIN(maxM, (1<10 ? 1 : 0;*/ - /*return MAX16(0,1-exp(-.25*(metric-2.)));*/ - return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2)))); -} - -/* Viterbi decoding trying to find the best frame size combination using look-ahead - - State numbering: - 0: unused - 1: 2.5 ms - 2: 5 ms (#1) - 3: 5 ms (#2) - 4: 10 ms (#1) - 5: 10 ms (#2) - 6: 10 ms (#3) - 7: 10 ms (#4) - 8: 20 ms (#1) - 9: 20 ms (#2) - 10: 20 ms (#3) - 11: 20 ms (#4) - 12: 20 ms (#5) - 13: 20 ms (#6) - 14: 20 ms (#7) - 15: 20 ms (#8) -*/ -static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate) -{ - int i; - float cost[MAX_DYNAMIC_FRAMESIZE][16]; - int states[MAX_DYNAMIC_FRAMESIZE][16]; - float best_cost; - int best_state; - float factor; - /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ - if (rate<80) - factor=0; - else if (rate>160) - factor=1; - else - factor = (rate-80.f)/80.f; - /* Makes variable framesize less aggressive at lower bitrates, but I can't - find any valid theoretical justification for this (other than it seems - to help) */ - for (i=0;i<16;i++) - { - /* Impossible state */ - states[0][i] = -1; - cost[0][i] = 1e10; - } - for (i=0;i<4;i++) - { - cost[0][1<=0;i--) - { - /*printf("%d ", best_state);*/ - best_state = states[i][best_state]; - } - /*printf("%d\n", best_state);*/ - return best_state; -} - -static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs, - int bitrate, opus_val16 tonality, float *mem, int buffering, - downmix_func downmix) -{ - int N; - int i; - float e[MAX_DYNAMIC_FRAMESIZE+4]; - float e_1[MAX_DYNAMIC_FRAMESIZE+3]; - opus_val32 memx; - int bestLM=0; - int subframe; - int pos; - int offset; - VARDECL(opus_val32, sub); - - subframe = Fs/400; - ALLOC(sub, subframe, opus_val32); - e[0]=mem[0]; - e_1[0]=1.f/(EPSILON+mem[0]); - if (buffering) - { - /* Consider the CELT delay when not in restricted-lowdelay */ - /* We assume the buffering is between 2.5 and 5 ms */ - offset = 2*subframe - buffering; - celt_assert(offset>=0 && offset <= subframe); - len -= offset; - e[1]=mem[1]; - e_1[1]=1.f/(EPSILON+mem[1]); - e[2]=mem[2]; - e_1[2]=1.f/(EPSILON+mem[2]); - pos = 3; - } else { - pos=1; - offset=0; - } - N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); - /* Just silencing a warning, it's really initialized later */ - memx = 0; - for (i=0;i-1) - { - for (j=0;j-1) - { - for (j=0;j= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS) - new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS)); - else - return -1; - if (new_size>frame_size) - return -1; - if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && - 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs) - return -1; - return new_size; -} - -opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, - int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, downmix_func downmix -#ifndef DISABLE_FLOAT_API - , float *subframe_mem -#endif - ) -{ -#ifndef DISABLE_FLOAT_API - if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) - { - int LM = 3; - LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, - 0, subframe_mem, delay_compensation, downmix); - while ((Fs/400<frame_size) - LM--; - frame_size = (Fs/400<XX += MULT16_32_Q15(short_alpha, xx-mem->XX); - mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY); - mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY); - mem->XX = MAX32(0, mem->XX); - mem->XY = MAX32(0, mem->XY); - mem->YY = MAX32(0, mem->YY); - if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18)) - { - opus_val16 corr; - opus_val16 ldiff; - opus_val16 width; - sqrt_xx = celt_sqrt(mem->XX); - sqrt_yy = celt_sqrt(mem->YY); - qrrt_xx = celt_sqrt(sqrt_xx); - qrrt_yy = celt_sqrt(sqrt_yy); - /* Inter-channel correlation */ - mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy); - corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16); - /* Approximate loudness difference */ - ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy); - width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff); - /* Smoothing over one second */ - mem->smoothed_width += (width-mem->smoothed_width)/frame_rate; - /* Peak follower */ - mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width); - } - /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/ - return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower))); -} - -opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, - int analysis_channels, downmix_func downmix, int float_api) -{ - void *silk_enc; - CELTEncoder *celt_enc; - int i; - int ret=0; - opus_int32 nBytes; - ec_enc enc; - int bytes_target; - int prefill=0; - int start_band = 0; - int redundancy = 0; - int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */ - int celt_to_silk = 0; - VARDECL(opus_val16, pcm_buf); - int nb_compr_bytes; - int to_celt = 0; - opus_uint32 redundant_rng = 0; - int cutoff_Hz, hp_freq_smth1; - int voice_est; /* Probability of voice in Q7 */ - opus_int32 equiv_rate; - int delay_compensation; - int frame_rate; - opus_int32 max_rate; /* Max bitrate we're allowed to use */ - int curr_bandwidth; - opus_val16 HB_gain; - opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ - int total_buffer; - opus_val16 stereo_width; - const CELTMode *celt_mode; -#ifndef DISABLE_FLOAT_API - AnalysisInfo analysis_info; - int analysis_read_pos_bak=-1; - int analysis_read_subframe_bak=-1; -#endif - VARDECL(opus_val16, tmp_prefill); - - ALLOC_STACK; - - max_data_bytes = IMIN(1276, out_data_bytes); - - st->rangeFinal = 0; - if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs && - 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs) - || (400*frame_size < st->Fs) - || max_data_bytes<=0 - ) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - silk_enc = (char*)st+st->silk_enc_offset; - celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(lsb_depth, st->lsb_depth); - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); -#ifndef DISABLE_FLOAT_API - analysis_info.valid = 0; -#ifdef FIXED_POINT - if (st->silk_mode.complexity >= 10 && st->Fs==48000) -#else - if (st->silk_mode.complexity >= 7 && st->Fs==48000) -#endif - { - analysis_read_pos_bak = st->analysis.read_pos; - analysis_read_subframe_bak = st->analysis.read_subframe; - run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, - c1, c2, analysis_channels, st->Fs, - lsb_depth, downmix, &analysis_info); - } -#else - (void)analysis_pcm; - (void)analysis_size; -#endif - - st->voice_ratio = -1; - -#ifndef DISABLE_FLOAT_API - st->detected_bandwidth = 0; - if (analysis_info.valid) - { - int analysis_bandwidth; - if (st->signal_type == OPUS_AUTO) - st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); - - analysis_bandwidth = analysis_info.bandwidth; - if (analysis_bandwidth<=12) - st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; - else if (analysis_bandwidth<=14) - st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - else if (analysis_bandwidth<=16) - st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; - else if (analysis_bandwidth<=18) - st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; - else - st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; - } -#endif - - if (st->channels==2 && st->force_channels!=1) - stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem); - else - stereo_width = 0; - total_buffer = delay_compensation; - st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); - - frame_rate = st->Fs/frame_size; - if (!st->use_vbr) - { - int cbrBytes; - /* Multiply by 3 to make sure the division is exact. */ - int frame_rate3 = 3*st->Fs/frame_size; - /* We need to make sure that "int" values always fit in 16 bits. */ - cbrBytes = IMIN( (3*st->bitrate_bps/8 + frame_rate3/2)/frame_rate3, max_data_bytes); - st->bitrate_bps = cbrBytes*(opus_int32)frame_rate3*8/3; - max_data_bytes = cbrBytes; - } - if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8 - || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400))) - { - /*If the space is too low to do something useful, emit 'PLC' frames.*/ - int tocmode = st->mode; - int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth; - if (tocmode==0) - tocmode = MODE_SILK_ONLY; - if (frame_rate>100) - tocmode = MODE_CELT_ONLY; - if (frame_rate < 50) - tocmode = MODE_SILK_ONLY; - if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND) - bw=OPUS_BANDWIDTH_WIDEBAND; - else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND) - bw=OPUS_BANDWIDTH_NARROWBAND; - else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND) - bw=OPUS_BANDWIDTH_SUPERWIDEBAND; - data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels); - ret = 1; - if (!st->use_vbr) - { - ret = opus_packet_pad(data, ret, max_data_bytes); - if (ret == OPUS_OK) - ret = max_data_bytes; - } - RESTORE_STACK; - return ret; - } - max_rate = frame_rate*max_data_bytes*8; - - /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */ - equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50); - - if (st->signal_type == OPUS_SIGNAL_VOICE) - voice_est = 127; - else if (st->signal_type == OPUS_SIGNAL_MUSIC) - voice_est = 0; - else if (st->voice_ratio >= 0) - { - voice_est = st->voice_ratio*327>>8; - /* For AUDIO, never be more than 90% confident of having speech */ - if (st->application == OPUS_APPLICATION_AUDIO) - voice_est = IMIN(voice_est, 115); - } else if (st->application == OPUS_APPLICATION_VOIP) - voice_est = 115; - else - voice_est = 48; - - if (st->force_channels!=OPUS_AUTO && st->channels == 2) - { - st->stream_channels = st->force_channels; - } else { -#ifdef FUZZING - /* Random mono/stereo decision */ - if (st->channels == 2 && (rand()&0x1F)==0) - st->stream_channels = 3-st->stream_channels; -#else - /* Rate-dependent mono-stereo decision */ - if (st->channels == 2) - { - opus_int32 stereo_threshold; - stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14); - if (st->stream_channels == 2) - stereo_threshold -= 1000; - else - stereo_threshold += 1000; - st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1; - } else { - st->stream_channels = st->channels; - } -#endif - } - equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50); - - /* Mode selection depending on application and signal type */ - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - { - st->mode = MODE_CELT_ONLY; - } else if (st->user_forced_mode == OPUS_AUTO) - { -#ifdef FUZZING - /* Random mode switching */ - if ((rand()&0xF)==0) - { - if ((rand()&0x1)==0) - st->mode = MODE_CELT_ONLY; - else - st->mode = MODE_SILK_ONLY; - } else { - if (st->prev_mode==MODE_CELT_ONLY) - st->mode = MODE_CELT_ONLY; - else - st->mode = MODE_SILK_ONLY; - } -#else - opus_int32 mode_voice, mode_music; - opus_int32 threshold; - - /* Interpolate based on stereo width */ - mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0]) - + MULT16_32_Q15(stereo_width,mode_thresholds[1][0])); - mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1]) - + MULT16_32_Q15(stereo_width,mode_thresholds[1][1])); - /* Interpolate based on speech/music probability */ - threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); - /* Bias towards SILK for VoIP because of some useful features */ - if (st->application == OPUS_APPLICATION_VOIP) - threshold += 8000; - - /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/ - /* Hysteresis */ - if (st->prev_mode == MODE_CELT_ONLY) - threshold -= 4000; - else if (st->prev_mode>0) - threshold += 4000; - - st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; - - /* When FEC is enabled and there's enough packet loss, use SILK */ - if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) - st->mode = MODE_SILK_ONLY; - /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */ - if (st->silk_mode.useDTX && voice_est > 100) - st->mode = MODE_SILK_ONLY; -#endif - } else { - st->mode = st->user_forced_mode; - } - - /* Override the chosen mode to make sure we meet the requested frame size */ - if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100) - st->mode = MODE_CELT_ONLY; - if (st->lfe) - st->mode = MODE_CELT_ONLY; - /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */ - if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8)) - st->mode = MODE_CELT_ONLY; - - if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 - && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) - { - /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ - st->silk_mode.toMono = 1; - st->stream_channels = 2; - } else { - st->silk_mode.toMono = 0; - } - - if (st->prev_mode > 0 && - ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || - (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY))) - { - redundancy = 1; - celt_to_silk = (st->mode != MODE_CELT_ONLY); - if (!celt_to_silk) - { - /* Switch to SILK/hybrid if frame size is 10 ms or more*/ - if (frame_size >= st->Fs/100) - { - st->mode = st->prev_mode; - to_celt = 1; - } else { - redundancy=0; - } - } - } - /* For the first frame at a new SILK bandwidth */ - if (st->silk_bw_switch) - { - redundancy = 1; - celt_to_silk = 1; - st->silk_bw_switch = 0; - prefill=1; - } - - if (redundancy) - { - /* Fair share of the max size allowed */ - redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200)); - /* For VBR, target the actual bitrate (subject to the limit above) */ - if (st->use_vbr) - redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600); - } - - if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) - { - silk_EncControlStruct dummy; - silk_InitEncoder( silk_enc, st->arch, &dummy); - prefill=1; - } - - /* Automatic (rate-dependent) bandwidth selection */ - if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) - { - const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds; - opus_int32 bandwidth_thresholds[8]; - int bandwidth = OPUS_BANDWIDTH_FULLBAND; - opus_int32 equiv_rate2; - - equiv_rate2 = equiv_rate; - if (st->mode != MODE_CELT_ONLY) - { - /* Adjust the threshold +/- 10% depending on complexity */ - equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50; - /* CBR is less efficient by ~1 kb/s */ - if (!st->use_vbr) - equiv_rate2 -= 1000; - } - if (st->channels==2 && st->force_channels!=1) - { - voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds; - music_bandwidth_thresholds = stereo_music_bandwidth_thresholds; - } else { - voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds; - music_bandwidth_thresholds = mono_music_bandwidth_thresholds; - } - /* Interpolate bandwidth thresholds depending on voice estimation */ - for (i=0;i<8;i++) - { - bandwidth_thresholds[i] = music_bandwidth_thresholds[i] - + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14); - } - do { - int threshold, hysteresis; - threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)]; - hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1]; - if (!st->first) - { - if (st->bandwidth >= bandwidth) - threshold -= hysteresis; - else - threshold += hysteresis; - } - if (equiv_rate2 >= threshold) - break; - } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); - st->bandwidth = bandwidth; - /* Prevents any transition to SWB/FB until the SILK layer has fully - switched to WB mode and turned the variable LP filter off */ - if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; - } - - if (st->bandwidth>st->max_bandwidth) - st->bandwidth = st->max_bandwidth; - - if (st->user_bandwidth != OPUS_AUTO) - st->bandwidth = st->user_bandwidth; - - /* This prevents us from using hybrid at unsafe CBR/max rates */ - if (st->mode != MODE_CELT_ONLY && max_rate < 15000) - { - st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND); - } - - /* Prevents Opus from wasting bits on frequencies that are above - the Nyquist rate of the input signal */ - if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND) - st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; - if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; - if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND) - st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND) - st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; -#ifndef DISABLE_FLOAT_API - /* Use detected bandwidth to reduce the encoded bandwidth. */ - if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO) - { - int min_detected_bandwidth; - /* Makes bandwidth detection more conservative just in case the detector - gets it wrong when we could have coded a high bandwidth transparently. - When operating in SILK/hybrid mode, we don't go below wideband to avoid - more complicated switches that require redundancy. */ - if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY) - min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; - else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY) - min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - else if (equiv_rate <= 30000*st->stream_channels) - min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; - else if (equiv_rate <= 44000*st->stream_channels) - min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; - else - min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; - - st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth); - st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth); - } -#endif - celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth)); - - /* CELT mode doesn't support mediumband, use wideband instead */ - if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; - if (st->lfe) - st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; - - /* Can't support higher than wideband for >20 ms frames */ - if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) - { - VARDECL(unsigned char, tmp_data); - int nb_frames; - int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; - VARDECL(OpusRepacketizer, rp); - opus_int32 bytes_per_frame; - opus_int32 repacketize_len; - -#ifndef DISABLE_FLOAT_API - if (analysis_read_pos_bak!= -1) - { - st->analysis.read_pos = analysis_read_pos_bak; - st->analysis.read_subframe = analysis_read_subframe_bak; - } -#endif - - nb_frames = frame_size > st->Fs/25 ? 3 : 2; - bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); - - ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); - - ALLOC(rp, 1, OpusRepacketizer); - opus_repacketizer_init(rp); - - bak_mode = st->user_forced_mode; - bak_bandwidth = st->user_bandwidth; - bak_channels = st->force_channels; - - st->user_forced_mode = st->mode; - st->user_bandwidth = st->bandwidth; - st->force_channels = st->stream_channels; - bak_to_mono = st->silk_mode.toMono; - - if (bak_to_mono) - st->force_channels = 1; - else - st->prev_channels = st->stream_channels; - for (i=0;isilk_mode.toMono = 0; - /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ - if (to_celt && i==nb_frames-1) - st->user_forced_mode = MODE_CELT_ONLY; - tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, - tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, - NULL, 0, c1, c2, analysis_channels, downmix, float_api); - if (tmp_len<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); - if (ret<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - } - if (st->use_vbr) - repacketize_len = out_data_bytes; - else - repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes); - ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); - if (ret<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - st->user_forced_mode = bak_mode; - st->user_bandwidth = bak_bandwidth; - st->force_channels = bak_channels; - st->silk_mode.toMono = bak_to_mono; - RESTORE_STACK; - return ret; - } - curr_bandwidth = st->bandwidth; - - /* Chooses the appropriate mode for speech - *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ - if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_HYBRID; - if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_SILK_ONLY; - - /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ - bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; - - data += 1; - - ec_enc_init(&enc, data, max_data_bytes-1); - - ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); - OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels); - - if (st->mode == MODE_CELT_ONLY) - hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); - else - hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15; - - st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15, - hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) ); - - /* convert from log scale to Hertz */ - cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) ); - - if (st->application == OPUS_APPLICATION_VOIP) - { - hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); - } else { - dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); - } -#ifndef FIXED_POINT - if (float_api) - { - opus_val32 sum; - sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch); - /* This should filter out both NaNs and ridiculous signals that could - cause NaNs further down. */ - if (!(sum < 1e9f) || celt_isnan(sum)) - { - OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels); - st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0; - } - } -#endif - - - /* SILK processing */ - HB_gain = Q15ONE; - if (st->mode != MODE_CELT_ONLY) - { - opus_int32 total_bitRate, celt_rate; -#ifdef FIXED_POINT - const opus_int16 *pcm_silk; -#else - VARDECL(opus_int16, pcm_silk); - ALLOC(pcm_silk, st->channels*frame_size, opus_int16); -#endif - - /* Distribute bits between SILK and CELT */ - total_bitRate = 8 * bytes_target * frame_rate; - if( st->mode == MODE_HYBRID ) { - int HB_gain_ref; - /* Base rate for SILK */ - st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) ); - if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { - /* SILK gets 2/3 of the remaining bits */ - st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3; - } else { /* FULLBAND */ - /* SILK gets 3/5 of the remaining bits */ - st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5; - } - /* Don't let SILK use more than 80% */ - if( st->silk_mode.bitRate > total_bitRate * 4/5 ) { - st->silk_mode.bitRate = total_bitRate * 4/5; - } - if (!st->energy_masking) - { - /* Increasingly attenuate high band when it gets allocated fewer bits */ - celt_rate = total_bitRate - st->silk_mode.bitRate; - HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600; - HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6); - HB_gain = HB_gain < (opus_val32)Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE; - } - } else { - /* SILK gets all bits */ - st->silk_mode.bitRate = total_bitRate; - } - - /* Surround masking for SILK */ - if (st->energy_masking && st->use_vbr && !st->lfe) - { - opus_val32 mask_sum=0; - opus_val16 masking_depth; - opus_int32 rate_offset; - int c; - int end = 17; - opus_int16 srate = 16000; - if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) - { - end = 13; - srate = 8000; - } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - { - end = 15; - srate = 12000; - } - for (c=0;cchannels;c++) - { - for(i=0;ienergy_masking[21*c+i], - QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); - if (mask > 0) - mask = HALF16(mask); - mask_sum += mask; - } - } - /* Conservative rate reduction, we cut the masking in half */ - masking_depth = mask_sum / end*st->channels; - masking_depth += QCONST16(.2f, DB_SHIFT); - rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); - rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); - /* Split the rate change between the SILK and CELT part for hybrid. */ - if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND) - st->silk_mode.bitRate += 3*rate_offset/5; - else - st->silk_mode.bitRate += rate_offset; - bytes_target += rate_offset * frame_size / (8 * st->Fs); - } - - st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; - st->silk_mode.nChannelsAPI = st->channels; - st->silk_mode.nChannelsInternal = st->stream_channels; - if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { - st->silk_mode.desiredInternalSampleRate = 8000; - } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { - st->silk_mode.desiredInternalSampleRate = 12000; - } else { - silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); - st->silk_mode.desiredInternalSampleRate = 16000; - } - if( st->mode == MODE_HYBRID ) { - /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */ - st->silk_mode.minInternalSampleRate = 16000; - } else { - st->silk_mode.minInternalSampleRate = 8000; - } - - if (st->mode == MODE_SILK_ONLY) - { - opus_int32 effective_max_rate = max_rate; - st->silk_mode.maxInternalSampleRate = 16000; - if (frame_rate > 50) - effective_max_rate = effective_max_rate*2/3; - if (effective_max_rate < 13000) - { - st->silk_mode.maxInternalSampleRate = 12000; - st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate); - } - if (effective_max_rate < 9600) - { - st->silk_mode.maxInternalSampleRate = 8000; - st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate); - } - } else { - st->silk_mode.maxInternalSampleRate = 16000; - } - - st->silk_mode.useCBR = !st->use_vbr; - - /* Call SILK encoder for the low band */ - nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes); - - st->silk_mode.maxBits = nBytes*8; - /* Only allow up to 90% of the bits for hybrid mode*/ - if (st->mode == MODE_HYBRID) - st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10; - if (st->silk_mode.useCBR) - { - st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8; - /* Reduce the initial target to make it easier to reach the CBR rate */ - st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000); - } - - if (prefill) - { - opus_int32 zero=0; - int prefill_offset; - /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode - a discontinuity. The exact location is what we need to avoid leaving any "gap" - in the audio when mixing with the redundant CELT frame. Here we can afford to - overwrite st->delay_buffer because the only thing that uses it before it gets - rewritten is tmp_prefill[] and even then only the part after the ramp really - gets used (rather than sent to the encoder and discarded) */ - prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); - gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, - 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); - OPUS_CLEAR(st->delay_buffer, prefill_offset); -#ifdef FIXED_POINT - pcm_silk = st->delay_buffer; -#else - for (i=0;iencoder_buffer*st->channels;i++) - pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]); -#endif - silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 ); - } - -#ifdef FIXED_POINT - pcm_silk = pcm_buf+total_buffer*st->channels; -#else - for (i=0;ichannels;i++) - pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); -#endif - ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); - if( ret ) { - /*fprintf (stderr, "SILK encode error: %d\n", ret);*/ - /* Handle error */ - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - if (nBytes==0) - { - st->rangeFinal = 0; - data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); - RESTORE_STACK; - return 1; - } - /* Extract SILK internal bandwidth for signaling in first byte */ - if( st->mode == MODE_SILK_ONLY ) { - if( st->silk_mode.internalSampleRate == 8000 ) { - curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND; - } else if( st->silk_mode.internalSampleRate == 12000 ) { - curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - } else if( st->silk_mode.internalSampleRate == 16000 ) { - curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; - } - } else { - silk_assert( st->silk_mode.internalSampleRate == 16000 ); - } - - st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; - /* FIXME: How do we allocate the redundancy for CBR? */ - if (st->silk_mode.opusCanSwitch) - { - redundancy = 1; - celt_to_silk = 0; - st->silk_bw_switch = 1; - } - } - - /* CELT processing */ - { - int endband=21; - - switch(curr_bandwidth) - { - case OPUS_BANDWIDTH_NARROWBAND: - endband = 13; - break; - case OPUS_BANDWIDTH_MEDIUMBAND: - case OPUS_BANDWIDTH_WIDEBAND: - endband = 17; - break; - case OPUS_BANDWIDTH_SUPERWIDEBAND: - endband = 19; - break; - case OPUS_BANDWIDTH_FULLBAND: - endband = 21; - break; - } - celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband)); - celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels)); - } - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); - if (st->mode != MODE_SILK_ONLY) - { - opus_val32 celt_pred=2; - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - /* We may still decide to disable prediction later */ - if (st->silk_mode.reducedDependency) - celt_pred = 0; - celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred)); - - if (st->mode == MODE_HYBRID) - { - int len; - - len = (ec_tell(&enc)+7)>>3; - if (redundancy) - len += st->mode == MODE_HYBRID ? 3 : 1; - if( st->use_vbr ) { - nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs); - } else { - /* check if SILK used up too much */ - nb_compr_bytes = len > bytes_target ? len : bytes_target; - } - } else { - if (st->use_vbr) - { - opus_int32 bonus=0; -#ifndef DISABLE_FLOAT_API - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) - { - bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); - if (analysis_info.valid) - bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); - } -#endif - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); - celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus)); - nb_compr_bytes = max_data_bytes-1-redundancy_bytes; - } else { - nb_compr_bytes = bytes_target; - } - } - - } else { - nb_compr_bytes = 0; - } - - ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); - if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) - { - OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400); - } - - if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0) - { - OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer)); - OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)], - &pcm_buf[0], - (frame_size+total_buffer)*st->channels); - } else { - OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels); - } - /* gain_fade() and stereo_fade() need to be after the buffer copying - because we don't want any of this to affect the SILK part */ - if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { - gain_fade(pcm_buf, pcm_buf, - st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); - } - st->prev_HB_gain = HB_gain; - if (st->mode != MODE_HYBRID || st->stream_channels==1) - st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000)); - if( !st->energy_masking && st->channels == 2 ) { - /* Apply stereo width reduction (at low bitrates) */ - if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { - opus_val16 g1, g2; - g1 = st->hybrid_stereo_width_Q14; - g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); -#ifdef FIXED_POINT - g1 = g1==16384 ? Q15ONE : SHL16(g1,1); - g2 = g2==16384 ? Q15ONE : SHL16(g2,1); -#else - g1 *= (1.f/16384); - g2 *= (1.f/16384); -#endif - stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, - frame_size, st->channels, celt_mode->window, st->Fs); - st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; - } - } - - if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1)) - { - /* For SILK mode, the redundancy is inferred from the length */ - if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes)) - ec_enc_bit_logp(&enc, redundancy, 12); - if (redundancy) - { - int max_redundancy; - ec_enc_bit_logp(&enc, celt_to_silk, 1); - if (st->mode == MODE_HYBRID) - max_redundancy = (max_data_bytes-1)-nb_compr_bytes; - else - max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3); - /* Target the same bit-rate for redundancy as for the rest, - up to a max of 257 bytes */ - redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600); - redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes)); - if (st->mode == MODE_HYBRID) - ec_enc_uint(&enc, redundancy_bytes-2, 256); - } - } else { - redundancy = 0; - } - - if (!redundancy) - { - st->silk_bw_switch = 0; - redundancy_bytes = 0; - } - if (st->mode != MODE_CELT_ONLY)start_band=17; - - if (st->mode == MODE_SILK_ONLY) - { - ret = (ec_tell(&enc)+7)>>3; - ec_enc_done(&enc); - nb_compr_bytes = ret; - } else { - nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes); - ec_enc_shrink(&enc, nb_compr_bytes); - } - -#ifndef DISABLE_FLOAT_API - if (redundancy || st->mode != MODE_SILK_ONLY) - celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); -#endif - - /* 5 ms redundant frame for CELT->SILK */ - if (redundancy && celt_to_silk) - { - int err; - celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); - if (err < 0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - } - - celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band)); - - if (st->mode != MODE_SILK_ONLY) - { - if (st->mode != st->prev_mode && st->prev_mode > 0) - { - unsigned char dummy[2]; - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - - /* Prefilling */ - celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL); - celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); - } - /* If false, we already busted the budget and we'll end up with a "PLC packet" */ - if (ec_tell(&enc) <= 8*nb_compr_bytes) - { - ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); - if (ret < 0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - } - } - - /* 5 ms redundant frame for SILK->CELT */ - if (redundancy && !celt_to_silk) - { - int err; - unsigned char dummy[2]; - int N2, N4; - N2 = st->Fs/200; - N4 = st->Fs/400; - - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); - celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); - - /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ - celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); - - err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); - if (err < 0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); - } - - - - /* Signalling the mode in the first byte */ - data--; - data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); - - st->rangeFinal = enc.rng ^ redundant_rng; - - if (to_celt) - st->prev_mode = MODE_CELT_ONLY; - else - st->prev_mode = st->mode; - st->prev_channels = st->stream_channels; - st->prev_framesize = frame_size; - - st->first = 0; - - /* In the unlikely case that the SILK encoder busted its target, tell - the decoder to call the PLC */ - if (ec_tell(&enc) > (max_data_bytes-1)*8) - { - if (max_data_bytes < 2) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - data[1] = 0; - ret = 1; - st->rangeFinal = 0; - } else if (st->mode==MODE_SILK_ONLY&&!redundancy) - { - /*When in LPC only mode it's perfectly - reasonable to strip off trailing zero bytes as - the required range decoder behavior is to - fill these in. This can't be done when the MDCT - modes are used because the decoder needs to know - the actual length for allocation purposes.*/ - while(ret>2&&data[ret]==0)ret--; - } - /* Count ToC and redundancy */ - ret += 1+redundancy_bytes; - if (!st->use_vbr) - { - if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK) - - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - ret = max_data_bytes; - } - RESTORE_STACK; - return ret; -} - -#ifdef FIXED_POINT - -#ifndef DISABLE_FLOAT_API -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 max_data_bytes) -{ - int i, ret; - int frame_size; - int delay_compensation; - VARDECL(opus_int16, in); - ALLOC_STACK; - - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_float, st->analysis.subframe_mem); - - ALLOC(in, frame_size*st->channels, opus_int16); - - for (i=0;ichannels;i++) - in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); - RESTORE_STACK; - return ret; -} -#endif - -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 out_data_bytes) -{ - int frame_size; - int delay_compensation; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_int -#ifndef DISABLE_FLOAT_API - , st->analysis.subframe_mem -#endif - ); - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); -} - -#else -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 max_data_bytes) -{ - int i, ret; - int frame_size; - int delay_compensation; - VARDECL(float, in); - ALLOC_STACK; - - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_int, st->analysis.subframe_mem); - - ALLOC(in, frame_size*st->channels, float); - - for (i=0;ichannels;i++) - in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); - RESTORE_STACK; - return ret; -} -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 out_data_bytes) -{ - int frame_size; - int delay_compensation; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_float, st->analysis.subframe_mem); - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); -} -#endif - - -int opus_encoder_ctl(OpusEncoder *st, int request, ...) -{ - int ret; - CELTEncoder *celt_enc; - va_list ap; - - ret = OPUS_OK; - va_start(ap, request); - - celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); - - switch (request) - { - case OPUS_SET_APPLICATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO - && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY) - || (!st->first && st->application != value)) - { - ret = OPUS_BAD_ARG; - break; - } - st->application = value; - } - break; - case OPUS_GET_APPLICATION_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->application; - } - break; - case OPUS_SET_BITRATE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX) - { - if (value <= 0) - goto bad_arg; - else if (value <= 500) - value = 500; - else if (value > (opus_int32)300000*st->channels) - value = (opus_int32)300000*st->channels; - } - st->user_bitrate_bps = value; - } - break; - case OPUS_GET_BITRATE_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276); - } - break; - case OPUS_SET_FORCE_CHANNELS_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if((value<1 || value>st->channels) && value != OPUS_AUTO) - { - goto bad_arg; - } - st->force_channels = value; - } - break; - case OPUS_GET_FORCE_CHANNELS_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->force_channels; - } - break; - case OPUS_SET_MAX_BANDWIDTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) - { - goto bad_arg; - } - st->max_bandwidth = value; - if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { - st->silk_mode.maxInternalSampleRate = 8000; - } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { - st->silk_mode.maxInternalSampleRate = 12000; - } else { - st->silk_mode.maxInternalSampleRate = 16000; - } - } - break; - case OPUS_GET_MAX_BANDWIDTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->max_bandwidth; - } - break; - case OPUS_SET_BANDWIDTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO) - { - goto bad_arg; - } - st->user_bandwidth = value; - if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { - st->silk_mode.maxInternalSampleRate = 8000; - } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { - st->silk_mode.maxInternalSampleRate = 12000; - } else { - st->silk_mode.maxInternalSampleRate = 16000; - } - } - break; - case OPUS_GET_BANDWIDTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->bandwidth; - } - break; - case OPUS_SET_DTX_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->silk_mode.useDTX = value; - } - break; - case OPUS_GET_DTX_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.useDTX; - } - break; - case OPUS_SET_COMPLEXITY_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>10) - { - goto bad_arg; - } - st->silk_mode.complexity = value; - celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value)); - } - break; - case OPUS_GET_COMPLEXITY_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.complexity; - } - break; - case OPUS_SET_INBAND_FEC_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->silk_mode.useInBandFEC = value; - } - break; - case OPUS_GET_INBAND_FEC_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.useInBandFEC; - } - break; - case OPUS_SET_PACKET_LOSS_PERC_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value < 0 || value > 100) - { - goto bad_arg; - } - st->silk_mode.packetLossPercentage = value; - celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value)); - } - break; - case OPUS_GET_PACKET_LOSS_PERC_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.packetLossPercentage; - } - break; - case OPUS_SET_VBR_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->use_vbr = value; - st->silk_mode.useCBR = 1-value; - } - break; - case OPUS_GET_VBR_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->use_vbr; - } - break; - case OPUS_SET_VOICE_RATIO_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<-1 || value>100) - { - goto bad_arg; - } - st->voice_ratio = value; - } - break; - case OPUS_GET_VOICE_RATIO_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->voice_ratio; - } - break; - case OPUS_SET_VBR_CONSTRAINT_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->vbr_constraint = value; - } - break; - case OPUS_GET_VBR_CONSTRAINT_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->vbr_constraint; - } - break; - case OPUS_SET_SIGNAL_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC) - { - goto bad_arg; - } - st->signal_type = value; - } - break; - case OPUS_GET_SIGNAL_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->signal_type; - } - break; - case OPUS_GET_LOOKAHEAD_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->Fs/400; - if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY) - *value += st->delay_compensation; - } - break; - case OPUS_GET_SAMPLE_RATE_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->Fs; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 *value = va_arg(ap, opus_uint32*); - if (!value) - { - goto bad_arg; - } - *value = st->rangeFinal; - } - break; - case OPUS_SET_LSB_DEPTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<8 || value>24) - { - goto bad_arg; - } - st->lsb_depth=value; - } - break; - case OPUS_GET_LSB_DEPTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->lsb_depth; - } - break; - case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && - value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && - value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && - value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE) - { - goto bad_arg; - } - st->variable_duration = value; - celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); - } - break; - case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->variable_duration; - } - break; - case OPUS_SET_PREDICTION_DISABLED_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value > 1 || value < 0) - goto bad_arg; - st->silk_mode.reducedDependency = value; - } - break; - case OPUS_GET_PREDICTION_DISABLED_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - goto bad_arg; - *value = st->silk_mode.reducedDependency; - } - break; - case OPUS_RESET_STATE: - { - void *silk_enc; - silk_EncControlStruct dummy; - char *start; - silk_enc = (char*)st+st->silk_enc_offset; -#ifndef DISABLE_FLOAT_API - tonality_analysis_reset(&st->analysis); -#endif - - start = (char*)&st->OPUS_ENCODER_RESET_START; - OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st)); - - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - silk_InitEncoder( silk_enc, st->arch, &dummy ); - st->stream_channels = st->channels; - st->hybrid_stereo_width_Q14 = 1 << 14; - st->prev_HB_gain = Q15ONE; - st->first = 1; - st->mode = MODE_HYBRID; - st->bandwidth = OPUS_BANDWIDTH_FULLBAND; - st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); - } - break; - case OPUS_SET_FORCE_MODE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO) - { - goto bad_arg; - } - st->user_forced_mode = value; - } - break; - case OPUS_SET_LFE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->lfe = value; - ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); - } - break; - case OPUS_SET_ENERGY_MASK_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_masking = value; - ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); - } - break; - - case CELT_GET_MODE_REQUEST: - { - const CELTMode ** value = va_arg(ap, const CELTMode**); - if (!value) - { - goto bad_arg; - } - ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value)); - } - break; - default: - /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/ - ret = OPUS_UNIMPLEMENTED; - break; - } - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - -void opus_encoder_destroy(OpusEncoder *st) -{ - opus_free(st); -} diff --git a/thirdparty/opus/opus_multistream.c b/thirdparty/opus/opus_multistream.c deleted file mode 100644 index 09c3639b7f..0000000000 --- a/thirdparty/opus/opus_multistream.c +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_multistream.h" -#include "opus.h" -#include "opus_private.h" -#include "stack_alloc.h" -#include -#include "float_cast.h" -#include "os_support.h" - - -int validate_layout(const ChannelLayout *layout) -{ - int i, max_channel; - - max_channel = layout->nb_streams+layout->nb_coupled_streams; - if (max_channel>255) - return 0; - for (i=0;inb_channels;i++) - { - if (layout->mapping[i] >= max_channel && layout->mapping[i] != 255) - return 0; - } - return 1; -} - - -int get_left_channel(const ChannelLayout *layout, int stream_id, int prev) -{ - int i; - i = (prev<0) ? 0 : prev+1; - for (;inb_channels;i++) - { - if (layout->mapping[i]==stream_id*2) - return i; - } - return -1; -} - -int get_right_channel(const ChannelLayout *layout, int stream_id, int prev) -{ - int i; - i = (prev<0) ? 0 : prev+1; - for (;inb_channels;i++) - { - if (layout->mapping[i]==stream_id*2+1) - return i; - } - return -1; -} - -int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev) -{ - int i; - i = (prev<0) ? 0 : prev+1; - for (;inb_channels;i++) - { - if (layout->mapping[i]==stream_id+layout->nb_coupled_streams) - return i; - } - return -1; -} - diff --git a/thirdparty/opus/opus_multistream_decoder.c b/thirdparty/opus/opus_multistream_decoder.c deleted file mode 100644 index b95eaa6eac..0000000000 --- a/thirdparty/opus/opus_multistream_decoder.c +++ /dev/null @@ -1,537 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_multistream.h" -#include "opus.h" -#include "opus_private.h" -#include "stack_alloc.h" -#include -#include "float_cast.h" -#include "os_support.h" - -struct OpusMSDecoder { - ChannelLayout layout; - /* Decoder states go here */ -}; - - - - -/* DECODER */ - -opus_int32 opus_multistream_decoder_get_size(int nb_streams, int nb_coupled_streams) -{ - int coupled_size; - int mono_size; - - if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0; - coupled_size = opus_decoder_get_size(2); - mono_size = opus_decoder_get_size(1); - return align(sizeof(OpusMSDecoder)) - + nb_coupled_streams * align(coupled_size) - + (nb_streams-nb_coupled_streams) * align(mono_size); -} - -int opus_multistream_decoder_init( - OpusMSDecoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping -) -{ - int coupled_size; - int mono_size; - int i, ret; - char *ptr; - - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - return OPUS_BAD_ARG; - - st->layout.nb_channels = channels; - st->layout.nb_streams = streams; - st->layout.nb_coupled_streams = coupled_streams; - - for (i=0;ilayout.nb_channels;i++) - st->layout.mapping[i] = mapping[i]; - if (!validate_layout(&st->layout)) - return OPUS_BAD_ARG; - - ptr = (char*)st + align(sizeof(OpusMSDecoder)); - coupled_size = opus_decoder_get_size(2); - mono_size = opus_decoder_get_size(1); - - for (i=0;ilayout.nb_coupled_streams;i++) - { - ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 2); - if(ret!=OPUS_OK)return ret; - ptr += align(coupled_size); - } - for (;ilayout.nb_streams;i++) - { - ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 1); - if(ret!=OPUS_OK)return ret; - ptr += align(mono_size); - } - return OPUS_OK; -} - - -OpusMSDecoder *opus_multistream_decoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int *error -) -{ - int ret; - OpusMSDecoder *st; - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - st = (OpusMSDecoder *)opus_alloc(opus_multistream_decoder_get_size(streams, coupled_streams)); - if (st==NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_multistream_decoder_init(st, Fs, channels, streams, coupled_streams, mapping); - if (error) - *error = ret; - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - return st; -} - -typedef void (*opus_copy_channel_out_func)( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size -); - -static int opus_multistream_packet_validate(const unsigned char *data, - opus_int32 len, int nb_streams, opus_int32 Fs) -{ - int s; - int count; - unsigned char toc; - opus_int16 size[48]; - int samples=0; - opus_int32 packet_offset; - - for (s=0;slayout.nb_streams-1) - { - RESTORE_STACK; - return OPUS_INVALID_PACKET; - } - if (!do_plc) - { - int ret = opus_multistream_packet_validate(data, len, st->layout.nb_streams, Fs); - if (ret < 0) - { - RESTORE_STACK; - return ret; - } else if (ret > frame_size) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - } - for (s=0;slayout.nb_streams;s++) - { - OpusDecoder *dec; - int packet_offset, ret; - - dec = (OpusDecoder*)ptr; - ptr += (s < st->layout.nb_coupled_streams) ? align(coupled_size) : align(mono_size); - - if (!do_plc && len<=0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - packet_offset = 0; - ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip); - data += packet_offset; - len -= packet_offset; - if (ret <= 0) - { - RESTORE_STACK; - return ret; - } - frame_size = ret; - if (s < st->layout.nb_coupled_streams) - { - int chan, prev; - prev = -1; - /* Copy "left" audio to the channel(s) where it belongs */ - while ( (chan = get_left_channel(&st->layout, s, prev)) != -1) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf, 2, frame_size); - prev = chan; - } - prev = -1; - /* Copy "right" audio to the channel(s) where it belongs */ - while ( (chan = get_right_channel(&st->layout, s, prev)) != -1) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf+1, 2, frame_size); - prev = chan; - } - } else { - int chan, prev; - prev = -1; - /* Copy audio to the channel(s) where it belongs */ - while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf, 1, frame_size); - prev = chan; - } - } - } - /* Handle muted channels */ - for (c=0;clayout.nb_channels;c++) - { - if (st->layout.mapping[c] == 255) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, c, - NULL, 0, frame_size); - } - } - RESTORE_STACK; - return frame_size; -} - -#if !defined(DISABLE_FLOAT_API) -static void opus_copy_channel_out_float( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size -) -{ - float *float_dst; - opus_int32 i; - float_dst = (float*)dst; - if (src != NULL) - { - for (i=0;ilayout.nb_streams;s++) - { - OpusDecoder *dec; - dec = (OpusDecoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_decoder_ctl(dec, request, &tmp); - if (ret != OPUS_OK) break; - *value ^= tmp; - } - } - break; - case OPUS_RESET_STATE: - { - int s; - for (s=0;slayout.nb_streams;s++) - { - OpusDecoder *dec; - - dec = (OpusDecoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_decoder_ctl(dec, OPUS_RESET_STATE); - if (ret != OPUS_OK) - break; - } - } - break; - case OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST: - { - int s; - opus_int32 stream_id; - OpusDecoder **value; - stream_id = va_arg(ap, opus_int32); - if (stream_id<0 || stream_id >= st->layout.nb_streams) - ret = OPUS_BAD_ARG; - value = va_arg(ap, OpusDecoder**); - if (!value) - { - goto bad_arg; - } - for (s=0;slayout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - *value = (OpusDecoder*)ptr; - } - break; - case OPUS_SET_GAIN_REQUEST: - { - int s; - /* This works for int32 params */ - opus_int32 value = va_arg(ap, opus_int32); - for (s=0;slayout.nb_streams;s++) - { - OpusDecoder *dec; - - dec = (OpusDecoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_decoder_ctl(dec, request, value); - if (ret != OPUS_OK) - break; - } - } - break; - default: - ret = OPUS_UNIMPLEMENTED; - break; - } - - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - - -void opus_multistream_decoder_destroy(OpusMSDecoder *st) -{ - opus_free(st); -} diff --git a/thirdparty/opus/opus_multistream_encoder.c b/thirdparty/opus/opus_multistream_encoder.c deleted file mode 100644 index 1698223a16..0000000000 --- a/thirdparty/opus/opus_multistream_encoder.c +++ /dev/null @@ -1,1351 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_multistream.h" -#include "opus.h" -#include "opus_private.h" -#include "stack_alloc.h" -#include -#include "float_cast.h" -#include "os_support.h" -#include "mathops.h" -#include "mdct.h" -#include "modes.h" -#include "bands.h" -#include "quant_bands.h" -#include "pitch.h" - -typedef struct { - int nb_streams; - int nb_coupled_streams; - unsigned char mapping[8]; -} VorbisLayout; - -/* Index is nb_channel-1*/ -static const VorbisLayout vorbis_mappings[8] = { - {1, 0, {0}}, /* 1: mono */ - {1, 1, {0, 1}}, /* 2: stereo */ - {2, 1, {0, 2, 1}}, /* 3: 1-d surround */ - {2, 2, {0, 1, 2, 3}}, /* 4: quadraphonic surround */ - {3, 2, {0, 4, 1, 2, 3}}, /* 5: 5-channel surround */ - {4, 2, {0, 4, 1, 2, 3, 5}}, /* 6: 5.1 surround */ - {4, 3, {0, 4, 1, 2, 3, 5, 6}}, /* 7: 6.1 surround */ - {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */ -}; - -typedef void (*opus_copy_channel_in_func)( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size -); - -typedef enum { - MAPPING_TYPE_NONE, - MAPPING_TYPE_SURROUND -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - , /* Do not include comma at end of enumerator list */ - MAPPING_TYPE_AMBISONICS -#endif -} MappingType; - -struct OpusMSEncoder { - ChannelLayout layout; - int arch; - int lfe_stream; - int application; - int variable_duration; - MappingType mapping_type; - opus_int32 bitrate_bps; - float subframe_mem[3]; - /* Encoder states go here */ - /* then opus_val32 window_mem[channels*120]; */ - /* then opus_val32 preemph_mem[channels]; */ -}; - -static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) -{ - int s; - char *ptr; - int coupled_size, mono_size; - - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;slayout.nb_streams;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - /* void* cast avoids clang -Wcast-align warning */ - return (opus_val32*)(void*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); -} - -static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) -{ - int s; - char *ptr; - int coupled_size, mono_size; - - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;slayout.nb_streams;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - /* void* cast avoids clang -Wcast-align warning */ - return (opus_val32*)(void*)ptr; -} - -static int validate_encoder_layout(const ChannelLayout *layout) -{ - int s; - for (s=0;snb_streams;s++) - { - if (s < layout->nb_coupled_streams) - { - if (get_left_channel(layout, s, -1)==-1) - return 0; - if (get_right_channel(layout, s, -1)==-1) - return 0; - } else { - if (get_mono_channel(layout, s, -1)==-1) - return 0; - } - } - return 1; -} - -static void channel_pos(int channels, int pos[8]) -{ - /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ - if (channels==4) - { - pos[0]=1; - pos[1]=3; - pos[2]=1; - pos[3]=3; - } else if (channels==3||channels==5||channels==6) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=0; - } else if (channels==7) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=2; - pos[6]=0; - } else if (channels==8) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=1; - pos[6]=3; - pos[7]=0; - } -} - -#if 1 -/* Computes a rough approximation of log2(2^a + 2^b) */ -static opus_val16 logSum(opus_val16 a, opus_val16 b) -{ - opus_val16 max; - opus_val32 diff; - opus_val16 frac; - static const opus_val16 diff_table[17] = { - QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT), - QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT), - QCONST16(0.0028123f, DB_SHIFT) - }; - int low; - if (a>b) - { - max = a; - diff = SUB32(EXTEND32(a),EXTEND32(b)); - } else { - max = b; - diff = SUB32(EXTEND32(b),EXTEND32(a)); - } - if (!(diff < QCONST16(8.f, DB_SHIFT))) /* inverted to catch NaNs */ - return max; -#ifdef FIXED_POINT - low = SHR32(diff, DB_SHIFT-1); - frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT); -#else - low = (int)floor(2*diff); - frac = 2*diff - low; -#endif - return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low])); -} -#else -opus_val16 logSum(opus_val16 a, opus_val16 b) -{ - return log2(pow(4, a)+ pow(4, b))/2; -} -#endif - -void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, - int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch -) -{ - int c; - int i; - int LM; - int pos[8] = {0}; - int upsample; - int frame_size; - opus_val16 channel_offset; - opus_val32 bandE[21]; - opus_val16 maskLogE[3][21]; - VARDECL(opus_val32, in); - VARDECL(opus_val16, x); - VARDECL(opus_val32, freq); - SAVE_STACK; - - upsample = resampling_factor(rate); - frame_size = len*upsample; - - /* LM = log2(frame_size / 120) */ - for (LM=0;LMmaxLM;LM++) - if (celt_mode->shortMdctSize<preemph, preemph_mem+c, 0); -#ifndef FIXED_POINT - { - opus_val32 sum; - sum = celt_inner_prod(in, in, frame_size+overlap, 0); - /* This should filter out both NaNs and ridiculous signals that could - cause NaNs further down. */ - if (!(sum < 1e18f) || celt_isnan(sum)) - { - OPUS_CLEAR(in, frame_size+overlap); - preemph_mem[c] = 0; - } - } -#endif - clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, - overlap, celt_mode->maxLM-LM, 1, arch); - if (upsample != 1) - { - int bound = len; - for (i=0;i=0;i--) - bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT)); - if (pos[c]==1) - { - for (i=0;i<21;i++) - maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]); - } else if (pos[c]==3) - { - for (i=0;i<21;i++) - maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]); - } else if (pos[c]==2) - { - for (i=0;i<21;i++) - { - maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); - maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); - } - } -#if 0 - for (i=0;i<21;i++) - printf("%f ", bandLogE[21*c+i]); - float sum=0; - for (i=0;i<21;i++) - sum += bandLogE[21*c+i]; - printf("%f ", sum/21); -#endif - OPUS_COPY(mem+c*overlap, in+frame_size, overlap); - } - for (i=0;i<21;i++) - maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]); - channel_offset = HALF16(celt_log2(QCONST32(2.f,14)/(channels-1))); - for (c=0;c<3;c++) - for (i=0;i<21;i++) - maskLogE[c][i] += channel_offset; -#if 0 - for (c=0;c<3;c++) - { - for (i=0;i<21;i++) - printf("%f ", maskLogE[c][i]); - } -#endif - for (c=0;cnb_streams||nb_coupled_streams<0)return 0; - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - return align(sizeof(OpusMSEncoder)) - + nb_coupled_streams * align(coupled_size) - + (nb_streams-nb_coupled_streams) * align(mono_size); -} - -opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_family) -{ - int nb_streams; - int nb_coupled_streams; - opus_int32 size; - - if (mapping_family==0) - { - if (channels==1) - { - nb_streams=1; - nb_coupled_streams=0; - } else if (channels==2) - { - nb_streams=1; - nb_coupled_streams=1; - } else - return 0; - } else if (mapping_family==1 && channels<=8 && channels>=1) - { - nb_streams=vorbis_mappings[channels-1].nb_streams; - nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; - } else if (mapping_family==255) - { - nb_streams=channels; - nb_coupled_streams=0; -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - } else if (mapping_family==254) - { - nb_streams=channels; - nb_coupled_streams=0; -#endif - } else - return 0; - size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); - if (channels>2) - { - size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32)); - } - return size; -} - -static int opus_multistream_encoder_init_impl( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application, - MappingType mapping_type -) -{ - int coupled_size; - int mono_size; - int i, ret; - char *ptr; - - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - return OPUS_BAD_ARG; - - st->arch = opus_select_arch(); - st->layout.nb_channels = channels; - st->layout.nb_streams = streams; - st->layout.nb_coupled_streams = coupled_streams; - st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; - if (mapping_type != MAPPING_TYPE_SURROUND) - st->lfe_stream = -1; - st->bitrate_bps = OPUS_AUTO; - st->application = application; - st->variable_duration = OPUS_FRAMESIZE_ARG; - for (i=0;ilayout.nb_channels;i++) - st->layout.mapping[i] = mapping[i]; - if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout)) - return OPUS_BAD_ARG; - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - - for (i=0;ilayout.nb_coupled_streams;i++) - { - ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application); - if(ret!=OPUS_OK)return ret; - if (i==st->lfe_stream) - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); - ptr += align(coupled_size); - } - for (;ilayout.nb_streams;i++) - { - ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application); - if (i==st->lfe_stream) - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); - if(ret!=OPUS_OK)return ret; - ptr += align(mono_size); - } - if (mapping_type == MAPPING_TYPE_SURROUND) - { - OPUS_CLEAR(ms_get_preemph_mem(st), channels); - OPUS_CLEAR(ms_get_window_mem(st), channels*120); - } - st->mapping_type = mapping_type; - return OPUS_OK; -} - -int opus_multistream_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application -) -{ - return opus_multistream_encoder_init_impl(st, Fs, channels, streams, - coupled_streams, mapping, - application, MAPPING_TYPE_NONE); -} - -int opus_multistream_surround_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application -) -{ - MappingType mapping_type; - - if ((channels>255) || (channels<1)) - return OPUS_BAD_ARG; - st->lfe_stream = -1; - if (mapping_family==0) - { - if (channels==1) - { - *streams=1; - *coupled_streams=0; - mapping[0]=0; - } else if (channels==2) - { - *streams=1; - *coupled_streams=1; - mapping[0]=0; - mapping[1]=1; - } else - return OPUS_UNIMPLEMENTED; - } else if (mapping_family==1 && channels<=8 && channels>=1) - { - int i; - *streams=vorbis_mappings[channels-1].nb_streams; - *coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; - for (i=0;i=6) - st->lfe_stream = *streams-1; - } else if (mapping_family==255) - { - int i; - *streams=channels; - *coupled_streams=0; - for(i=0;i2 && mapping_family==1) { - mapping_type = MAPPING_TYPE_SURROUND; -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - } else if (mapping_family==254) - { - mapping_type = MAPPING_TYPE_AMBISONICS; -#endif - } else - { - mapping_type = MAPPING_TYPE_NONE; - } - return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, - *coupled_streams, mapping, - application, mapping_type); -} - -OpusMSEncoder *opus_multistream_encoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application, - int *error -) -{ - int ret; - OpusMSEncoder *st; - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - st = (OpusMSEncoder *)opus_alloc(opus_multistream_encoder_get_size(streams, coupled_streams)); - if (st==NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_multistream_encoder_init(st, Fs, channels, streams, coupled_streams, mapping, application); - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} - -OpusMSEncoder *opus_multistream_surround_encoder_create( - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application, - int *error -) -{ - int ret; - opus_int32 size; - OpusMSEncoder *st; - if ((channels>255) || (channels<1)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - size = opus_multistream_surround_encoder_get_size(channels, mapping_family); - if (!size) - { - if (error) - *error = OPUS_UNIMPLEMENTED; - return NULL; - } - st = (OpusMSEncoder *)opus_alloc(size); - if (st==NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_multistream_surround_encoder_init(st, Fs, channels, mapping_family, streams, coupled_streams, mapping, application); - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} - -static void surround_rate_allocation( - OpusMSEncoder *st, - opus_int32 *rate, - int frame_size, - opus_int32 Fs - ) -{ - int i; - opus_int32 channel_rate; - int stream_offset; - int lfe_offset; - int coupled_ratio; /* Q8 */ - int lfe_ratio; /* Q8 */ - - if (st->bitrate_bps > st->layout.nb_channels*40000) - stream_offset = 20000; - else - stream_offset = st->bitrate_bps/st->layout.nb_channels/2; - stream_offset += 60*(Fs/frame_size-50); - /* We start by giving each stream (coupled or uncoupled) the same bitrate. - This models the main saving of coupled channels over uncoupled. */ - /* The LFE stream is an exception to the above and gets fewer bits. */ - lfe_offset = 3500 + 60*(Fs/frame_size-50); - /* Coupled streams get twice the mono rate after the first 20 kb/s. */ - coupled_ratio = 512; - /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */ - lfe_ratio = 32; - - /* Compute bitrate allocation between streams */ - if (st->bitrate_bps==OPUS_AUTO) - { - channel_rate = Fs+60*Fs/frame_size; - } else if (st->bitrate_bps==OPUS_BITRATE_MAX) - { - channel_rate = 300000; - } else { - int nb_lfe; - int nb_uncoupled; - int nb_coupled; - int total; - nb_lfe = (st->lfe_stream!=-1); - nb_coupled = st->layout.nb_coupled_streams; - nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe; - total = (nb_uncoupled<<8) /* mono */ - + coupled_ratio*nb_coupled /* stereo */ - + nb_lfe*lfe_ratio; - channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total; - } -#ifndef FIXED_POINT - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) - { - opus_int32 bonus; - bonus = 60*(Fs/frame_size-50); - channel_rate += bonus; - } -#endif - - for (i=0;ilayout.nb_streams;i++) - { - if (ilayout.nb_coupled_streams) - rate[i] = stream_offset+(channel_rate*coupled_ratio>>8); - else if (i!=st->lfe_stream) - rate[i] = stream_offset+channel_rate; - else - rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8); - } -} - -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS -static void ambisonics_rate_allocation( - OpusMSEncoder *st, - opus_int32 *rate, - int frame_size, - opus_int32 Fs - ) -{ - int i; - int non_mono_rate; - int total_rate; - - /* The mono channel gets (rate_ratio_num / rate_ratio_den) times as many bits - * as all other channels */ - const int rate_ratio_num = 4; - const int rate_ratio_den = 3; - const int num_channels = st->layout.nb_streams; - - if (st->bitrate_bps==OPUS_AUTO) - { - total_rate = num_channels * (20000 + st->layout.nb_streams*(Fs+60*Fs/frame_size)); - } else if (st->bitrate_bps==OPUS_BITRATE_MAX) - { - total_rate = num_channels * 320000; - } else { - total_rate = st->bitrate_bps; - } - - /* Let y be the non-mono rate and let p, q be integers such that the mono - * channel rate is (p/q) * y. - * Also let T be the total bitrate to allocate. Then - * (n - 1) y + (p/q) y = T - * y = (T q) / (qn - q + p) - */ - non_mono_rate = - total_rate * rate_ratio_den - / (rate_ratio_den*num_channels + rate_ratio_num - rate_ratio_den); - -#ifndef FIXED_POINT - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) - { - opus_int32 bonus = 60*(Fs/frame_size-50); - non_mono_rate += bonus; - } -#endif - - rate[0] = total_rate - (num_channels - 1) * non_mono_rate; - for (i=1;ilayout.nb_streams;i++) - { - rate[i] = non_mono_rate; - } -} -#endif /* ENABLE_EXPERIMENTAL_AMBISONICS */ - -static opus_int32 rate_allocation( - OpusMSEncoder *st, - opus_int32 *rate, - int frame_size - ) -{ - int i; - opus_int32 rate_sum=0; - opus_int32 Fs; - char *ptr; - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); - -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - if (st->mapping_type == MAPPING_TYPE_AMBISONICS) { - ambisonics_rate_allocation(st, rate, frame_size, Fs); - } else -#endif - { - surround_rate_allocation(st, rate, frame_size, Fs); - } - - for (i=0;ilayout.nb_streams;i++) - { - rate[i] = IMAX(rate[i], 500); - rate_sum += rate[i]; - } - return rate_sum; -} - -/* Max size in case the encoder decides to return three frames */ -#define MS_FRAME_TMP (3*1275+7) -static int opus_multistream_encode_native -( - OpusMSEncoder *st, - opus_copy_channel_in_func copy_channel_in, - const void *pcm, - int analysis_frame_size, - unsigned char *data, - opus_int32 max_data_bytes, - int lsb_depth, - downmix_func downmix, - int float_api -) -{ - opus_int32 Fs; - int coupled_size; - int mono_size; - int s; - char *ptr; - int tot_size; - VARDECL(opus_val16, buf); - VARDECL(opus_val16, bandSMR); - unsigned char tmp_data[MS_FRAME_TMP]; - OpusRepacketizer rp; - opus_int32 vbr; - const CELTMode *celt_mode; - opus_int32 bitrates[256]; - opus_val16 bandLogE[42]; - opus_val32 *mem = NULL; - opus_val32 *preemph_mem=NULL; - int frame_size; - opus_int32 rate_sum; - opus_int32 smallest_packet; - ALLOC_STACK; - - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - preemph_mem = ms_get_preemph_mem(st); - mem = ms_get_window_mem(st); - } - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr)); - opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode)); - - { - opus_int32 delay_compensation; - int channels; - - channels = st->layout.nb_streams + st->layout.nb_coupled_streams; - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); - delay_compensation -= Fs/400; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, channels, Fs, st->bitrate_bps, - delay_compensation, downmix -#ifndef DISABLE_FLOAT_API - , st->subframe_mem -#endif - ); - } - - if (400*frame_size < Fs) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - /* Validate frame_size before using it to allocate stack space. - This mirrors the checks in opus_encode[_float](). */ - if (400*frame_size != Fs && 200*frame_size != Fs && - 100*frame_size != Fs && 50*frame_size != Fs && - 25*frame_size != Fs && 50*frame_size != 3*Fs) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - /* Smallest packet the encoder can produce. */ - smallest_packet = st->layout.nb_streams*2-1; - if (max_data_bytes < smallest_packet) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - ALLOC(buf, 2*frame_size, opus_val16); - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - - ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch); - } - - /* Compute bitrate allocation between streams (this could be a lot better) */ - rate_sum = rate_allocation(st, bitrates, frame_size); - - if (!vbr) - { - if (st->bitrate_bps == OPUS_AUTO) - { - max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size)); - } else if (st->bitrate_bps != OPUS_BITRATE_MAX) - { - max_data_bytes = IMIN(max_data_bytes, IMAX(smallest_packet, - 3*st->bitrate_bps/(3*8*Fs/frame_size))); - } - } - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;slayout.nb_streams;s++) - { - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - opus_int32 equiv_rate; - equiv_rate = st->bitrate_bps; - if (frame_size*50 < Fs) - equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels; - if (equiv_rate > 10000*st->layout.nb_channels) - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); - else if (equiv_rate > 7000*st->layout.nb_channels) - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); - else if (equiv_rate > 5000*st->layout.nb_channels) - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); - else - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); - if (s < st->layout.nb_coupled_streams) - { - /* To preserve the spatial image, force stereo CELT on coupled streams */ - opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); - opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); - } - } -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - else if (st->mapping_type == MAPPING_TYPE_AMBISONICS) { - opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); - } -#endif - } - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - /* Counting ToC */ - tot_size = 0; - for (s=0;slayout.nb_streams;s++) - { - OpusEncoder *enc; - int len; - int curr_max; - int c1, c2; - int ret; - - opus_repacketizer_init(&rp); - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - { - int i; - int left, right; - left = get_left_channel(&st->layout, s, -1); - right = get_right_channel(&st->layout, s, -1); - (*copy_channel_in)(buf, 2, - pcm, st->layout.nb_channels, left, frame_size); - (*copy_channel_in)(buf+1, 2, - pcm, st->layout.nb_channels, right, frame_size); - ptr += align(coupled_size); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - for (i=0;i<21;i++) - { - bandLogE[i] = bandSMR[21*left+i]; - bandLogE[21+i] = bandSMR[21*right+i]; - } - } - c1 = left; - c2 = right; - } else { - int i; - int chan = get_mono_channel(&st->layout, s, -1); - (*copy_channel_in)(buf, 1, - pcm, st->layout.nb_channels, chan, frame_size); - ptr += align(mono_size); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - for (i=0;i<21;i++) - bandLogE[i] = bandSMR[21*chan+i]; - } - c1 = chan; - c2 = -1; - } - if (st->mapping_type == MAPPING_TYPE_SURROUND) - opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); - /* number of bytes left (+Toc) */ - curr_max = max_data_bytes - tot_size; - /* Reserve one byte for the last stream and two for the others */ - curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1); - curr_max = IMIN(curr_max,MS_FRAME_TMP); - /* Repacketizer will add one or two bytes for self-delimited frames */ - if (s != st->layout.nb_streams-1) curr_max -= curr_max>253 ? 2 : 1; - if (!vbr && s == st->layout.nb_streams-1) - opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size))); - len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, - pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix, float_api); - if (len<0) - { - RESTORE_STACK; - return len; - } - /* We need to use the repacketizer to add the self-delimiting lengths - while taking into account the fact that the encoder can now return - more than one frame at a time (e.g. 60 ms CELT-only) */ - ret = opus_repacketizer_cat(&rp, tmp_data, len); - /* If the opus_repacketizer_cat() fails, then something's seriously wrong - with the encoder. */ - if (ret != OPUS_OK) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp), - data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1); - data += len; - tot_size += len; - } - /*printf("\n");*/ - RESTORE_STACK; - return tot_size; -} - -#if !defined(DISABLE_FLOAT_API) -static void opus_copy_channel_in_float( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size -) -{ - const float *float_src; - opus_int32 i; - float_src = (const float *)src; - for (i=0;ibitrate_bps = value; - } - break; - case OPUS_GET_BITRATE_REQUEST: - { - int s; - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = 0; - for (s=0;slayout.nb_streams;s++) - { - opus_int32 rate; - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - opus_encoder_ctl(enc, request, &rate); - *value += rate; - } - } - break; - case OPUS_GET_LSB_DEPTH_REQUEST: - case OPUS_GET_VBR_REQUEST: - case OPUS_GET_APPLICATION_REQUEST: - case OPUS_GET_BANDWIDTH_REQUEST: - case OPUS_GET_COMPLEXITY_REQUEST: - case OPUS_GET_PACKET_LOSS_PERC_REQUEST: - case OPUS_GET_DTX_REQUEST: - case OPUS_GET_VOICE_RATIO_REQUEST: - case OPUS_GET_VBR_CONSTRAINT_REQUEST: - case OPUS_GET_SIGNAL_REQUEST: - case OPUS_GET_LOOKAHEAD_REQUEST: - case OPUS_GET_SAMPLE_RATE_REQUEST: - case OPUS_GET_INBAND_FEC_REQUEST: - case OPUS_GET_FORCE_CHANNELS_REQUEST: - case OPUS_GET_PREDICTION_DISABLED_REQUEST: - { - OpusEncoder *enc; - /* For int32* GET params, just query the first stream */ - opus_int32 *value = va_arg(ap, opus_int32*); - enc = (OpusEncoder*)ptr; - ret = opus_encoder_ctl(enc, request, value); - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - int s; - opus_uint32 *value = va_arg(ap, opus_uint32*); - opus_uint32 tmp; - if (!value) - { - goto bad_arg; - } - *value=0; - for (s=0;slayout.nb_streams;s++) - { - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_encoder_ctl(enc, request, &tmp); - if (ret != OPUS_OK) break; - *value ^= tmp; - } - } - break; - case OPUS_SET_LSB_DEPTH_REQUEST: - case OPUS_SET_COMPLEXITY_REQUEST: - case OPUS_SET_VBR_REQUEST: - case OPUS_SET_VBR_CONSTRAINT_REQUEST: - case OPUS_SET_MAX_BANDWIDTH_REQUEST: - case OPUS_SET_BANDWIDTH_REQUEST: - case OPUS_SET_SIGNAL_REQUEST: - case OPUS_SET_APPLICATION_REQUEST: - case OPUS_SET_INBAND_FEC_REQUEST: - case OPUS_SET_PACKET_LOSS_PERC_REQUEST: - case OPUS_SET_DTX_REQUEST: - case OPUS_SET_FORCE_MODE_REQUEST: - case OPUS_SET_FORCE_CHANNELS_REQUEST: - case OPUS_SET_PREDICTION_DISABLED_REQUEST: - { - int s; - /* This works for int32 params */ - opus_int32 value = va_arg(ap, opus_int32); - for (s=0;slayout.nb_streams;s++) - { - OpusEncoder *enc; - - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_encoder_ctl(enc, request, value); - if (ret != OPUS_OK) - break; - } - } - break; - case OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST: - { - int s; - opus_int32 stream_id; - OpusEncoder **value; - stream_id = va_arg(ap, opus_int32); - if (stream_id<0 || stream_id >= st->layout.nb_streams) - ret = OPUS_BAD_ARG; - value = va_arg(ap, OpusEncoder**); - if (!value) - { - goto bad_arg; - } - for (s=0;slayout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - *value = (OpusEncoder*)ptr; - } - break; - case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->variable_duration = value; - } - break; - case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->variable_duration; - } - break; - case OPUS_RESET_STATE: - { - int s; - st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0; - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels); - OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120); - } - for (s=0;slayout.nb_streams;s++) - { - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_encoder_ctl(enc, OPUS_RESET_STATE); - if (ret != OPUS_OK) - break; - } - } - break; - default: - ret = OPUS_UNIMPLEMENTED; - break; - } - - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - -void opus_multistream_encoder_destroy(OpusMSEncoder *st) -{ - opus_free(st); -} diff --git a/thirdparty/opus/opus_private.h b/thirdparty/opus/opus_private.h deleted file mode 100644 index 3b62eed096..0000000000 --- a/thirdparty/opus/opus_private.h +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2012 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#ifndef OPUS_PRIVATE_H -#define OPUS_PRIVATE_H - -#include "arch.h" -#include "opus.h" -#include "celt.h" - -#include /* offsetof */ - -struct OpusRepacketizer { - unsigned char toc; - int nb_frames; - const unsigned char *frames[48]; - opus_int16 len[48]; - int framesize; -}; - -typedef struct ChannelLayout { - int nb_channels; - int nb_streams; - int nb_coupled_streams; - unsigned char mapping[256]; -} ChannelLayout; - -int validate_layout(const ChannelLayout *layout); -int get_left_channel(const ChannelLayout *layout, int stream_id, int prev); -int get_right_channel(const ChannelLayout *layout, int stream_id, int prev); -int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev); - - - -#define MODE_SILK_ONLY 1000 -#define MODE_HYBRID 1001 -#define MODE_CELT_ONLY 1002 - -#define OPUS_SET_VOICE_RATIO_REQUEST 11018 -#define OPUS_GET_VOICE_RATIO_REQUEST 11019 - -/** Configures the encoder's expected percentage of voice - * opposed to music or other signals. - * - * @note This interface is currently more aspiration than actuality. It's - * ultimately expected to bias an automatic signal classifier, but it currently - * just shifts the static bitrate to mode mapping around a little bit. - * - * @param[in] x int: Voice percentage in the range 0-100, inclusive. - * @hideinitializer */ -#define OPUS_SET_VOICE_RATIO(x) OPUS_SET_VOICE_RATIO_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured voice ratio value, @see OPUS_SET_VOICE_RATIO - * - * @param[out] x int*: Voice percentage in the range 0-100, inclusive. - * @hideinitializer */ -#define OPUS_GET_VOICE_RATIO(x) OPUS_GET_VOICE_RATIO_REQUEST, __opus_check_int_ptr(x) - - -#define OPUS_SET_FORCE_MODE_REQUEST 11002 -#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x) - -typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int); -void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); -void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); - -int encode_size(int size, unsigned char *data); - -opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); - -opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, - int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, downmix_func downmix -#ifndef DISABLE_FLOAT_API - , float *subframe_mem -#endif - ); - -opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, - int analysis_channels, downmix_func downmix, int float_api); - -int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, - opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, - opus_int32 *packet_offset, int soft_clip); - -/* Make sure everything is properly aligned. */ -static OPUS_INLINE int align(int i) -{ - struct foo {char c; union { void* p; opus_int32 i; opus_val32 v; } u;}; - - unsigned int alignment = offsetof(struct foo, u); - - /* Optimizing compilers should optimize div and multiply into and - for all sensible alignment values. */ - return ((i + alignment - 1) / alignment) * alignment; -} - -int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, - int self_delimited, unsigned char *out_toc, - const unsigned char *frames[48], opus_int16 size[48], - int *payload_offset, opus_int32 *packet_offset); - -opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, - unsigned char *data, opus_int32 maxlen, int self_delimited, int pad); - -int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len); - -#endif /* OPUS_PRIVATE_H */ diff --git a/thirdparty/opus/opusfile.c b/thirdparty/opus/opusfile.c deleted file mode 100644 index b8b3a354cf..0000000000 --- a/thirdparty/opus/opusfile.c +++ /dev/null @@ -1,3266 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: stdio-based convenience library for opening/seeking/decoding - last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $ - - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" -#include -#include -#include -#include -#include -#include - -#include "opusfile.h" - -/*This implementation is largely based off of libvorbisfile. - All of the Ogg bits work roughly the same, though I have made some - "improvements" that have not been folded back there, yet.*/ - -/*A 'chained bitstream' is an Ogg Opus bitstream that contains more than one - logical bitstream arranged end to end (the only form of Ogg multiplexing - supported by this library. - Grouping (parallel multiplexing) is not supported, except to the extent that - if there are multiple logical Ogg streams in a single link of the chain, we - will ignore all but the first Opus stream we find.*/ - -/*An Ogg Opus file can be played beginning to end (streamed) without worrying - ahead of time about chaining (see opusdec from the opus-tools package). - If we have the whole file, however, and want random access - (seeking/scrubbing) or desire to know the total length/time of a file, we - need to account for the possibility of chaining.*/ - -/*We can handle things a number of ways. - We can determine the entire bitstream structure right off the bat, or find - pieces on demand. - This library determines and caches structure for the entire bitstream, but - builds a virtual decoder on the fly when moving between links in the chain.*/ - -/*There are also different ways to implement seeking. - Enough information exists in an Ogg bitstream to seek to sample-granularity - positions in the output. - Or, one can seek by picking some portion of the stream roughly in the desired - area if we only want coarse navigation through the stream. - We implement and expose both strategies.*/ - -/*The maximum number of bytes in a page (including the page headers).*/ -#define OP_PAGE_SIZE_MAX (65307) -/*The default amount to seek backwards per step when trying to find the - previous page. - This must be at least as large as the maximum size of a page.*/ -#define OP_CHUNK_SIZE (65536) -/*The maximum amount to seek backwards per step when trying to find the - previous page.*/ -#define OP_CHUNK_SIZE_MAX (1024*(opus_int32)1024) -/*A smaller read size is needed for low-rate streaming.*/ -#define OP_READ_SIZE (2048) - -int op_test(OpusHead *_head, - const unsigned char *_initial_data,size_t _initial_bytes){ - ogg_sync_state oy; - char *data; - int err; - /*The first page of a normal Opus file will be at most 57 bytes (27 Ogg - page header bytes + 1 lacing value + 21 Opus header bytes + 8 channel - mapping bytes). - It will be at least 47 bytes (27 Ogg page header bytes + 1 lacing value + - 19 Opus header bytes using channel mapping family 0). - If we don't have at least that much data, give up now.*/ - if(_initial_bytes<47)return OP_FALSE; - /*Only proceed if we start with the magic OggS string. - This is to prevent us spending a lot of time allocating memory and looking - for Ogg pages in non-Ogg files.*/ - if(memcmp(_initial_data,"OggS",4)!=0)return OP_ENOTFORMAT; - ogg_sync_init(&oy); - data=ogg_sync_buffer(&oy,_initial_bytes); - if(data!=NULL){ - ogg_stream_state os; - ogg_page og; - int ret; - memcpy(data,_initial_data,_initial_bytes); - ogg_sync_wrote(&oy,_initial_bytes); - ogg_stream_init(&os,-1); - err=OP_FALSE; - do{ - ogg_packet op; - ret=ogg_sync_pageout(&oy,&og); - /*Ignore holes.*/ - if(ret<0)continue; - /*Stop if we run out of data.*/ - if(!ret)break; - ogg_stream_reset_serialno(&os,ogg_page_serialno(&og)); - ogg_stream_pagein(&os,&og); - /*Only process the first packet on this page (if it's a BOS packet, - it's required to be the only one).*/ - if(ogg_stream_packetout(&os,&op)==1){ - if(op.b_o_s){ - ret=opus_head_parse(_head,op.packet,op.bytes); - /*If this didn't look like Opus, keep going.*/ - if(ret==OP_ENOTFORMAT)continue; - /*Otherwise we're done, one way or another.*/ - err=ret; - } - /*We finished parsing the headers. - There is no Opus to be found.*/ - else err=OP_ENOTFORMAT; - } - } - while(err==OP_FALSE); - ogg_stream_clear(&os); - } - else err=OP_EFAULT; - ogg_sync_clear(&oy); - return err; -} - -/*Many, many internal helpers. - The intention is not to be confusing. - Rampant duplication and monolithic function implementation (though we do have - some large, omnibus functions still) would be harder to understand anyway. - The high level functions are last. - Begin grokking near the end of the file if you prefer to read things - top-down.*/ - -/*The read/seek functions track absolute position within the stream.*/ - -/*Read a little more data from the file/pipe into the ogg_sync framer. - _nbytes: The maximum number of bytes to read. - Return: A positive number of bytes read on success, 0 on end-of-file, or a - negative value on failure.*/ -static int op_get_data(OggOpusFile *_of,int _nbytes){ - unsigned char *buffer; - int nbytes; - OP_ASSERT(_nbytes>0); - buffer=(unsigned char *)ogg_sync_buffer(&_of->oy,_nbytes); - nbytes=(int)(*_of->callbacks.read)(_of->source,buffer,_nbytes); - OP_ASSERT(nbytes<=_nbytes); - if(OP_LIKELY(nbytes>0))ogg_sync_wrote(&_of->oy,nbytes); - return nbytes; -} - -/*Save a tiny smidge of verbosity to make the code more readable.*/ -static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){ - if(_offset==_of->offset)return 0; - if(_of->callbacks.seek==NULL - ||(*_of->callbacks.seek)(_of->source,_offset,SEEK_SET)){ - return OP_EREAD; - } - _of->offset=_offset; - ogg_sync_reset(&_of->oy); - return 0; -} - -/*Get the current position indicator of the underlying source. - This should be the same as the value reported by tell().*/ -static opus_int64 op_position(const OggOpusFile *_of){ - /*The current position indicator is _not_ simply offset. - We may also have unprocessed, buffered data in the sync state.*/ - return _of->offset+_of->oy.fill-_of->oy.returned; -} - -/*From the head of the stream, get the next page. - _boundary specifies if the function is allowed to fetch more data from the - stream (and how much) or only use internally buffered data. - _boundary: -1: Unbounded search. - 0: Read no additional data. - Use only cached data. - n: Search for the start of a new page up to file position n. - Return: n>=0: Found a page at absolute offset n. - OP_FALSE: Hit the _boundary limit. - OP_EREAD: An underlying read operation failed. - OP_BADLINK: We hit end-of-file before reaching _boundary.*/ -static opus_int64 op_get_next_page(OggOpusFile *_of,ogg_page *_og, - opus_int64 _boundary){ - while(_boundary<=0||_of->offset<_boundary){ - int more; - more=ogg_sync_pageseek(&_of->oy,_og); - /*Skipped (-more) bytes.*/ - if(OP_UNLIKELY(more<0))_of->offset-=more; - else if(more==0){ - int read_nbytes; - int ret; - /*Send more paramedics.*/ - if(!_boundary)return OP_FALSE; - if(_boundary<0)read_nbytes=OP_READ_SIZE; - else{ - opus_int64 position; - position=op_position(_of); - if(position>=_boundary)return OP_FALSE; - read_nbytes=(int)OP_MIN(_boundary-position,OP_READ_SIZE); - } - ret=op_get_data(_of,read_nbytes); - if(OP_UNLIKELY(ret<0))return OP_EREAD; - if(OP_UNLIKELY(ret==0)){ - /*Only fail cleanly on EOF if we didn't have a known boundary. - Otherwise, we should have been able to reach that boundary, and this - is a fatal error.*/ - return OP_UNLIKELY(_boundary<0)?OP_FALSE:OP_EBADLINK; - } - } - else{ - /*Got a page. - Return the page start offset and advance the internal offset past the - page end.*/ - opus_int64 page_offset; - page_offset=_of->offset; - _of->offset+=more; - OP_ASSERT(page_offset>=0); - return page_offset; - } - } - return OP_FALSE; -} - -static int op_add_serialno(const ogg_page *_og, - ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){ - ogg_uint32_t *serialnos; - int nserialnos; - int cserialnos; - ogg_uint32_t s; - s=ogg_page_serialno(_og); - serialnos=*_serialnos; - nserialnos=*_nserialnos; - cserialnos=*_cserialnos; - if(OP_UNLIKELY(nserialnos>=cserialnos)){ - if(OP_UNLIKELY(cserialnos>INT_MAX/(int)sizeof(*serialnos)-1>>1)){ - return OP_EFAULT; - } - cserialnos=2*cserialnos+1; - OP_ASSERT(nserialnos=OP_PAGE_SIZE_MAX); - begin=OP_MAX(begin-chunk_size,0); - ret=op_seek_helper(_of,begin); - if(OP_UNLIKELY(ret<0))return ret; - search_start=begin; - while(_of->offsetsearch_start=search_start; - _sr->offset=_offset=llret; - _sr->serialno=serialno; - OP_ASSERT(_of->offset-_offset>=0); - OP_ASSERT(_of->offset-_offset<=OP_PAGE_SIZE_MAX); - _sr->size=(opus_int32)(_of->offset-_offset); - _sr->gp=ogg_page_granulepos(&og); - /*If this page is from the stream we're looking for, remember it.*/ - if(serialno==_serialno){ - preferred_found=1; - *&preferred_sr=*_sr; - } - if(!op_lookup_serialno(serialno,_serialnos,_nserialnos)){ - /*We fell off the end of the link, which means we seeked back too far - and shouldn't have been looking in that link to begin with. - If we found the preferred serial number, forget that we saw it.*/ - preferred_found=0; - } - search_start=llret+1; - } - /*We started from the beginning of the stream and found nothing. - This should be impossible unless the contents of the source changed out - from under us after we read from it.*/ - if(OP_UNLIKELY(!begin)&&OP_UNLIKELY(_offset<0))return OP_EBADLINK; - /*Bump up the chunk size. - This is mildly helpful when seeks are very expensive (http).*/ - chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); - /*Avoid quadratic complexity if we hit an invalid patch of the file.*/ - end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end); - } - while(_offset<0); - if(preferred_found)*_sr=*&preferred_sr; - return 0; -} - -/*Find the last page beginning before _offset with the given serial number and - a valid granule position. - Unlike the above search, this continues until it finds such a page, but does - not stray outside the current link. - We could implement it (inefficiently) by calling op_get_prev_page_serial() - repeatedly until it returned a page that had both our preferred serial - number and a valid granule position, but doing it with a separate function - allows us to avoid repeatedly re-scanning valid pages from other streams as - we seek-back-and-read-forward. - [out] _gp: Returns the granule position of the page that was found on - success. - _offset: The _offset before which to find a page. - Any page returned will consist of data entirely before _offset. - _serialno: The target serial number. - _serialnos: The list of serial numbers in the link that contains the - preferred serial number. - _nserialnos: The number of serial numbers in the current link. - Return: The offset of the page on success, or a negative value on failure. - OP_EREAD: Failed to read more data (error or EOF). - OP_EBADLINK: We couldn't find a page even after seeking back past the - beginning of the link.*/ -static opus_int64 op_get_last_page(OggOpusFile *_of,ogg_int64_t *_gp, - opus_int64 _offset,ogg_uint32_t _serialno, - const ogg_uint32_t *_serialnos,int _nserialnos){ - ogg_page og; - ogg_int64_t gp; - opus_int64 begin; - opus_int64 end; - opus_int64 original_end; - opus_int32 chunk_size; - /*The target serial number must belong to the current link.*/ - OP_ASSERT(op_lookup_serialno(_serialno,_serialnos,_nserialnos)); - original_end=end=begin=_offset; - _offset=-1; - /*We shouldn't have to initialize gp, but gcc is too dumb to figure out that - ret>=0 implies we entered the if(page_gp!=-1) block at least once.*/ - gp=-1; - chunk_size=OP_CHUNK_SIZE; - do{ - int left_link; - int ret; - OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX); - begin=OP_MAX(begin-chunk_size,0); - ret=op_seek_helper(_of,begin); - if(OP_UNLIKELY(ret<0))return ret; - left_link=0; - while(_of->offsetready_stateos,ogg_page_serialno(_og)); - ogg_stream_pagein(&_of->os,_og); - if(OP_LIKELY(ogg_stream_packetout(&_of->os,&op)>0)){ - ret=opus_head_parse(_head,op.packet,op.bytes); - /*Found a valid Opus header. - Continue setup.*/ - if(OP_LIKELY(ret>=0))_of->ready_state=OP_STREAMSET; - /*If it's just a stream type we don't recognize, ignore it. - Everything else is fatal.*/ - else if(ret!=OP_ENOTFORMAT)return ret; - } - /*TODO: Should a BOS page with no packets be an error?*/ - } - /*Get the next page. - No need to clamp the boundary offset against _of->end, as all errors - become OP_ENOTFORMAT or OP_EBADHEADER.*/ - if(OP_UNLIKELY(op_get_next_page(_of,_og, - OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ - return _of->ready_stateready_state!=OP_STREAMSET))return OP_ENOTFORMAT; - /*If the first non-header page belonged to our Opus stream, submit it.*/ - if(_of->os.serialno==ogg_page_serialno(_og))ogg_stream_pagein(&_of->os,_og); - /*Loop getting packets.*/ - for(;;){ - switch(ogg_stream_packetout(&_of->os,&op)){ - case 0:{ - /*Loop getting pages.*/ - for(;;){ - /*No need to clamp the boundary offset against _of->end, as all - errors become OP_EBADHEADER.*/ - if(OP_UNLIKELY(op_get_next_page(_of,_og, - OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ - return OP_EBADHEADER; - } - /*If this page belongs to the correct stream, go parse it.*/ - if(_of->os.serialno==ogg_page_serialno(_og)){ - ogg_stream_pagein(&_of->os,_og); - break; - } - /*If the link ends before we see the Opus comment header, abort.*/ - if(OP_UNLIKELY(ogg_page_bos(_og)))return OP_EBADHEADER; - /*Otherwise, keep looking.*/ - } - }break; - /*We shouldn't get a hole in the headers!*/ - case -1:return OP_EBADHEADER; - default:{ - /*Got a packet. - It should be the comment header.*/ - ret=opus_tags_parse(_tags,op.packet,op.bytes); - if(OP_UNLIKELY(ret<0))return ret; - /*Make sure the page terminated at the end of the comment header. - If there is another packet on the page, or part of a packet, then - reject the stream. - Otherwise seekable sources won't be able to seek back to the start - properly.*/ - ret=ogg_stream_packetout(&_of->os,&op); - if(OP_UNLIKELY(ret!=0) - ||OP_UNLIKELY(_og->header[_og->header_len-1]==255)){ - /*If we fail, the caller assumes our tags are uninitialized.*/ - opus_tags_clear(_tags); - return OP_EBADHEADER; - } - return 0; - } - } - } -} - -static int op_fetch_headers(OggOpusFile *_of,OpusHead *_head, - OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos, - int *_cserialnos,ogg_page *_og){ - ogg_page og; - int ret; - if(!_og){ - /*No need to clamp the boundary offset against _of->end, as all errors - become OP_ENOTFORMAT.*/ - if(OP_UNLIKELY(op_get_next_page(_of,&og, - OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ - return OP_ENOTFORMAT; - } - _og=&og; - } - _of->ready_state=OP_OPENED; - ret=op_fetch_headers_impl(_of,_head,_tags,_serialnos,_nserialnos, - _cserialnos,_og); - /*Revert back from OP_STREAMSET to OP_OPENED on failure, to prevent - double-free of the tags in an unseekable stream.*/ - if(OP_UNLIKELY(ret<0))_of->ready_state=OP_OPENED; - return ret; -} - -/*Granule position manipulation routines. - A granule position is defined to be an unsigned 64-bit integer, with the - special value -1 in two's complement indicating an unset or invalid granule - position. - We are not guaranteed to have an unsigned 64-bit type, so we construct the - following routines that - a) Properly order negative numbers as larger than positive numbers, and - b) Check for underflow or overflow past the special -1 value. - This lets us operate on the full, valid range of granule positions in a - consistent and safe manner. - This full range is organized into distinct regions: - [ -1 (invalid) ][ 0 ... OP_INT64_MAX ][ OP_INT64_MIN ... -2 ][-1 (invalid) ] - - No one should actually use granule positions so large that they're negative, - even if they are technically valid, as very little software handles them - correctly (including most of Xiph.Org's). - This library also refuses to support durations so large they won't fit in a - signed 64-bit integer (to avoid exposing this mess to the application, and - to simplify a good deal of internal arithmetic), so the only way to use them - successfully is if pcm_start is very large. - This means there isn't anything you can do with negative granule positions - that you couldn't have done with purely non-negative ones. - The main purpose of these routines is to allow us to think very explicitly - about the possible failure cases of all granule position manipulations.*/ - -/*Safely adds a small signed integer to a valid (not -1) granule position. - The result can use the full 64-bit range of values (both positive and - negative), but will fail on overflow (wrapping past -1; wrapping past - OP_INT64_MAX is explicitly okay). - [out] _dst_gp: The resulting granule position. - Only modified on success. - _src_gp: The granule position to add to. - This must not be -1. - _delta: The amount to add. - This is allowed to be up to 32 bits to support the maximum - duration of a single Ogg page (255 packets * 120 ms per - packet == 1,468,800 samples at 48 kHz). - Return: 0 on success, or OP_EINVAL if the result would wrap around past -1.*/ -static int op_granpos_add(ogg_int64_t *_dst_gp,ogg_int64_t _src_gp, - opus_int32 _delta){ - /*The code below handles this case correctly, but there's no reason we - should ever be called with these values, so make sure we aren't.*/ - OP_ASSERT(_src_gp!=-1); - if(_delta>0){ - /*Adding this amount to the granule position would overflow its 64-bit - range.*/ - if(OP_UNLIKELY(_src_gp<0)&&OP_UNLIKELY(_src_gp>=-1-_delta))return OP_EINVAL; - if(OP_UNLIKELY(_src_gp>OP_INT64_MAX-_delta)){ - /*Adding this amount to the granule position would overflow the positive - half of its 64-bit range. - Since signed overflow is undefined in C, do it in a way the compiler - isn't allowed to screw up.*/ - _delta-=(opus_int32)(OP_INT64_MAX-_src_gp)+1; - _src_gp=OP_INT64_MIN; - } - } - else if(_delta<0){ - /*Subtracting this amount from the granule position would underflow its - 64-bit range.*/ - if(_src_gp>=0&&OP_UNLIKELY(_src_gp<-_delta))return OP_EINVAL; - if(OP_UNLIKELY(_src_gp da < 0.*/ - da=(OP_INT64_MIN-_gp_a)-1; - /*_gp_b >= 0 => db >= 0.*/ - db=OP_INT64_MAX-_gp_b; - /*Step 2: Check for overflow.*/ - if(OP_UNLIKELY(OP_INT64_MAX+da= 0 => da <= 0*/ - da=_gp_a+OP_INT64_MIN; - /*_gp_b < 0 => db <= 0*/ - db=OP_INT64_MIN-_gp_b; - /*Step 2: Check for overflow.*/ - if(OP_UNLIKELY(da=0)return 1; - /*Else fall through.*/ - } - else if(OP_UNLIKELY(_gp_b<0))return -1; - /*No wrapping case.*/ - return (_gp_a>_gp_b)-(_gp_b>_gp_a); -} - -/*Returns the duration of the packet (in samples at 48 kHz), or a negative - value on error.*/ -static int op_get_packet_duration(const unsigned char *_data,int _len){ - int nframes; - int frame_size; - int nsamples; - nframes=opus_packet_get_nb_frames(_data,_len); - if(OP_UNLIKELY(nframes<0))return OP_EBADPACKET; - frame_size=opus_packet_get_samples_per_frame(_data,48000); - nsamples=nframes*frame_size; - if(OP_UNLIKELY(nsamples>120*48))return OP_EBADPACKET; - return nsamples; -} - -/*This function more properly belongs in info.c, but we define it here to allow - the static granule position manipulation functions to remain static.*/ -ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp){ - opus_int32 pre_skip; - pre_skip=_head->pre_skip; - if(_gp!=-1&&op_granpos_add(&_gp,_gp,-pre_skip))_gp=-1; - return _gp; -} - -/*Grab all the packets currently in the stream state, and compute their - durations. - _of->op_count is set to the number of packets collected. - [out] _durations: Returns the durations of the individual packets. - Return: The total duration of all packets, or OP_HOLE if there was a hole.*/ -static opus_int32 op_collect_audio_packets(OggOpusFile *_of, - int _durations[255]){ - opus_int32 total_duration; - int op_count; - /*Count the durations of all packets in the page.*/ - op_count=0; - total_duration=0; - for(;;){ - int ret; - /*This takes advantage of undocumented libogg behavior that returned - ogg_packet buffers are valid at least until the next page is - submitted. - Relying on this is not too terrible, as _none_ of the Ogg memory - ownership/lifetime rules are well-documented. - But I can read its code and know this will work.*/ - ret=ogg_stream_packetout(&_of->os,_of->op+op_count); - if(!ret)break; - if(OP_UNLIKELY(ret<0)){ - /*We shouldn't get holes in the middle of pages.*/ - OP_ASSERT(op_count==0); - /*Set the return value and break out of the loop. - We want to make sure op_count gets set to 0, because we've ingested a - page, so any previously loaded packets are now invalid.*/ - total_duration=OP_HOLE; - break; - } - /*Unless libogg is broken, we can't get more than 255 packets from a - single page.*/ - OP_ASSERT(op_count<255); - _durations[op_count]=op_get_packet_duration(_of->op[op_count].packet, - _of->op[op_count].bytes); - if(OP_LIKELY(_durations[op_count]>0)){ - /*With at most 255 packets on a page, this can't overflow.*/ - total_duration+=_durations[op_count++]; - } - /*Ignore packets with an invalid TOC sequence.*/ - else if(op_count>0){ - /*But save the granule position, if there was one.*/ - _of->op[op_count-1].granulepos=_of->op[op_count].granulepos; - } - } - _of->op_pos=0; - _of->op_count=op_count; - return total_duration; -} - -/*Starting from current cursor position, get the initial PCM offset of the next - page. - This also validates the granule position on the first page with a completed - audio data packet, as required by the spec. - If this link is completely empty (no pages with completed packets), then this - function sets pcm_start=pcm_end=0 and returns the BOS page of the next link - (if any). - In the seekable case, we initialize pcm_end=-1 before calling this function, - so that later we can detect that the link was empty before calling - op_find_final_pcm_offset(). - [inout] _link: The link for which to find pcm_start. - [out] _og: Returns the BOS page of the next link if this link was empty. - In the unseekable case, we can then feed this to - op_fetch_headers() to start the next link. - The caller may pass NULL (e.g., for seekable streams), in - which case this page will be discarded. - Return: 0 on success, 1 if there is a buffered BOS page available, or a - negative value on unrecoverable error.*/ -static int op_find_initial_pcm_offset(OggOpusFile *_of, - OggOpusLink *_link,ogg_page *_og){ - ogg_page og; - opus_int64 page_offset; - ogg_int64_t pcm_start; - ogg_int64_t prev_packet_gp; - ogg_int64_t cur_page_gp; - ogg_uint32_t serialno; - opus_int32 total_duration; - int durations[255]; - int cur_page_eos; - int op_count; - int pi; - if(_og==NULL)_og=&og; - serialno=_of->os.serialno; - op_count=0; - /*We shouldn't have to initialize total_duration, but gcc is too dumb to - figure out that op_count>0 implies we've been through the whole loop at - least once.*/ - total_duration=0; - do{ - page_offset=op_get_next_page(_of,_og,_of->end); - /*We should get a page unless the file is truncated or mangled. - Otherwise there are no audio data packets in the whole logical stream.*/ - if(OP_UNLIKELY(page_offset<0)){ - /*Fail if there was a read error.*/ - if(page_offsethead.pre_skip>0)return OP_EBADTIMESTAMP; - /*Set pcm_end and end_offset so we can skip the call to - op_find_final_pcm_offset().*/ - _link->pcm_start=_link->pcm_end=0; - _link->end_offset=_link->data_offset; - return 0; - } - /*Similarly, if we hit the next link in the chain, we've gone too far.*/ - if(OP_UNLIKELY(ogg_page_bos(_og))){ - if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP; - /*Set pcm_end and end_offset so we can skip the call to - op_find_final_pcm_offset().*/ - _link->pcm_end=_link->pcm_start=0; - _link->end_offset=_link->data_offset; - /*Tell the caller we've got a buffered page for them.*/ - return 1; - } - /*Ignore pages from other streams (not strictly necessary, because of the - checks in ogg_stream_pagein(), but saves some work).*/ - if(serialno!=(ogg_uint32_t)ogg_page_serialno(_og))continue; - ogg_stream_pagein(&_of->os,_og); - /*Bitrate tracking: add the header's bytes here. - The body bytes are counted when we consume the packets.*/ - _of->bytes_tracked+=_og->header_len; - /*Count the durations of all packets in the page.*/ - do total_duration=op_collect_audio_packets(_of,durations); - /*Ignore holes.*/ - while(OP_UNLIKELY(total_duration<0)); - op_count=_of->op_count; - } - while(op_count<=0); - /*We found the first page with a completed audio data packet: actually look - at the granule position. - RFC 3533 says, "A special value of -1 (in two's complement) indicates that - no packets finish on this page," which does not say that a granule - position that is NOT -1 indicates that some packets DO finish on that page - (even though this was the intention, libogg itself violated this intention - for years before we fixed it). - The Ogg Opus specification only imposes its start-time requirements - on the granule position of the first page with completed packets, - so we ignore any set granule positions until then.*/ - cur_page_gp=_of->op[op_count-1].granulepos; - /*But getting a packet without a valid granule position on the page is not - okay.*/ - if(cur_page_gp==-1)return OP_EBADTIMESTAMP; - cur_page_eos=_of->op[op_count-1].e_o_s; - if(OP_LIKELY(!cur_page_eos)){ - /*The EOS flag wasn't set. - Work backwards from the provided granule position to get the starting PCM - offset.*/ - if(OP_UNLIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){ - /*The starting granule position MUST not be smaller than the amount of - audio on the first page with completed packets.*/ - return OP_EBADTIMESTAMP; - } - } - else{ - /*The first page with completed packets was also the last.*/ - if(OP_LIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){ - /*If there's less audio on the page than indicated by the granule - position, then we're doing end-trimming, and the starting PCM offset - is zero by spec mandate.*/ - pcm_start=0; - /*However, the end-trimming MUST not ask us to trim more samples than - exist after applying the pre-skip.*/ - if(OP_UNLIKELY(op_granpos_cmp(cur_page_gp,_link->head.pre_skip)<0)){ - return OP_EBADTIMESTAMP; - } - } - } - /*Timestamp the individual packets.*/ - prev_packet_gp=pcm_start; - for(pi=0;pi0){ - /*If we trimmed the entire packet, stop (the spec says encoders - shouldn't do this, but we support it anyway).*/ - if(OP_UNLIKELY(diff>durations[pi]))break; - _of->op[pi].granulepos=prev_packet_gp=cur_page_gp; - /*Move the EOS flag to this packet, if necessary, so we'll trim the - samples.*/ - _of->op[pi].e_o_s=1; - continue; - } - } - /*Update the granule position as normal.*/ - OP_ALWAYS_TRUE(!op_granpos_add(&_of->op[pi].granulepos, - prev_packet_gp,durations[pi])); - prev_packet_gp=_of->op[pi].granulepos; - } - /*Update the packet count after end-trimming.*/ - _of->op_count=pi; - _of->cur_discard_count=_link->head.pre_skip; - _of->prev_packet_gp=_link->pcm_start=pcm_start; - _of->prev_page_offset=page_offset; - return 0; -} - -/*Starting from current cursor position, get the final PCM offset of the - previous page. - This also validates the duration of the link, which, while not strictly - required by the spec, we need to ensure duration calculations don't - overflow. - This is only done for seekable sources. - We must validate that op_find_initial_pcm_offset() succeeded for this link - before calling this function, otherwise it will scan the entire stream - backwards until it reaches the start, and then fail.*/ -static int op_find_final_pcm_offset(OggOpusFile *_of, - const ogg_uint32_t *_serialnos,int _nserialnos,OggOpusLink *_link, - opus_int64 _offset,ogg_uint32_t _end_serialno,ogg_int64_t _end_gp, - ogg_int64_t *_total_duration){ - ogg_int64_t total_duration; - ogg_int64_t duration; - ogg_uint32_t cur_serialno; - /*For the time being, fetch end PCM offset the simple way.*/ - cur_serialno=_link->serialno; - if(_end_serialno!=cur_serialno||_end_gp==-1){ - _offset=op_get_last_page(_of,&_end_gp,_offset, - cur_serialno,_serialnos,_nserialnos); - if(OP_UNLIKELY(_offset<0))return (int)_offset; - } - /*At worst we should have found the first page with completed packets.*/ - if(OP_UNLIKELY(_offset<_link->data_offset))return OP_EBADLINK; - /*This implementation requires that the difference between the first and last - granule positions in each link be representable in a signed, 64-bit - number, and that each link also have at least as many samples as the - pre-skip requires.*/ - if(OP_UNLIKELY(op_granpos_diff(&duration,_end_gp,_link->pcm_start)<0) - ||OP_UNLIKELY(duration<_link->head.pre_skip)){ - return OP_EBADTIMESTAMP; - } - /*We also require that the total duration be representable in a signed, - 64-bit number.*/ - duration-=_link->head.pre_skip; - total_duration=*_total_duration; - if(OP_UNLIKELY(OP_INT64_MAX-durationpcm_end=_end_gp; - _link->end_offset=_offset; - return 0; -} - -/*Rescale the number _x from the range [0,_from] to [0,_to]. - _from and _to must be positive.*/ -static opus_int64 op_rescale64(opus_int64 _x,opus_int64 _from,opus_int64 _to){ - opus_int64 frac; - opus_int64 ret; - int i; - if(_x>=_from)return _to; - if(_x<=0)return 0; - frac=0; - for(i=0;i<63;i++){ - frac<<=1; - OP_ASSERT(_x<=_from); - if(_x>=_from>>1){ - _x-=_from-_x; - frac|=1; - } - else _x<<=1; - } - ret=0; - for(i=0;i<63;i++){ - if(frac&1)ret=(ret&_to&1)+(ret>>1)+(_to>>1); - else ret>>=1; - frac>>=1; - } - return ret; -} - -/*The minimum granule position spacing allowed for making predictions. - This corresponds to about 1 second of audio at 48 kHz for both Opus and - Vorbis, or one keyframe interval in Theora with the default keyframe spacing - of 256.*/ -#define OP_GP_SPACING_MIN (48000) - -/*Try to estimate the location of the next link using the current seek - records, assuming the initial granule position of any streams we've found is - 0.*/ -static opus_int64 op_predict_link_start(const OpusSeekRecord *_sr,int _nsr, - opus_int64 _searched,opus_int64 _end_searched,opus_int32 _bias){ - opus_int64 bisect; - int sri; - int srj; - /*Require that we be at least OP_CHUNK_SIZE from the end. - We don't require that we be at least OP_CHUNK_SIZE from the beginning, - because if we are we'll just scan forward without seeking.*/ - _end_searched-=OP_CHUNK_SIZE; - if(_searched>=_end_searched)return -1; - bisect=_end_searched; - for(sri=0;sri<_nsr;sri++){ - ogg_int64_t gp1; - ogg_int64_t gp2_min; - ogg_uint32_t serialno1; - opus_int64 offset1; - /*If the granule position is negative, either it's invalid or we'd cause - overflow.*/ - gp1=_sr[sri].gp; - if(gp1<0)continue; - /*We require some minimum distance between granule positions to make an - estimate. - We don't actually know what granule position scheme is being used, - because we have no idea what kind of stream these came from. - Therefore we require a minimum spacing between them, with the - expectation that while bitrates and granule position increments might - vary locally in quite complex ways, they are globally smooth.*/ - if(OP_UNLIKELY(op_granpos_add(&gp2_min,gp1,OP_GP_SPACING_MIN)<0)){ - /*No granule position would satisfy us.*/ - continue; - } - offset1=_sr[sri].offset; - serialno1=_sr[sri].serialno; - for(srj=sri;srj-->0;){ - ogg_int64_t gp2; - opus_int64 offset2; - opus_int64 num; - ogg_int64_t den; - ogg_int64_t ipart; - gp2=_sr[srj].gp; - if(gp20); - if(ipart>0&&(offset2-_searched)/ipart=_end_searched?-1:bisect; -} - -/*Finds each bitstream link, one at a time, using a bisection search. - This has to begin by knowing the offset of the first link's initial page.*/ -static int op_bisect_forward_serialno(OggOpusFile *_of, - opus_int64 _searched,OpusSeekRecord *_sr,int _csr, - ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){ - ogg_page og; - OggOpusLink *links; - int nlinks; - int clinks; - ogg_uint32_t *serialnos; - int nserialnos; - ogg_int64_t total_duration; - int nsr; - int ret; - links=_of->links; - nlinks=clinks=_of->nlinks; - total_duration=0; - /*We start with one seek record, for the last page in the file. - We build up a list of records for places we seek to during link - enumeration. - This list is kept sorted in reverse order. - We only care about seek locations that were _not_ in the current link, - therefore we can add them one at a time to the end of the list as we - improve the lower bound on the location where the next link starts.*/ - nsr=1; - for(;;){ - opus_int64 end_searched; - opus_int64 bisect; - opus_int64 next; - opus_int64 last; - ogg_int64_t end_offset; - ogg_int64_t end_gp; - int sri; - serialnos=*_serialnos; - nserialnos=*_nserialnos; - if(OP_UNLIKELY(nlinks>=clinks)){ - if(OP_UNLIKELY(clinks>INT_MAX-1>>1))return OP_EFAULT; - clinks=2*clinks+1; - OP_ASSERT(nlinkslinks=links; - } - /*Invariants: - We have the headers and serial numbers for the link beginning at 'begin'. - We have the offset and granule position of the last page in the file - (potentially not a page we care about).*/ - /*Scan the seek records we already have to save us some bisection.*/ - for(sri=0;sri1){ - opus_int64 last_offset; - opus_int64 avg_link_size; - opus_int64 upper_limit; - last_offset=links[nlinks-1].offset; - avg_link_size=last_offset/(nlinks-1); - upper_limit=end_searched-OP_CHUNK_SIZE-avg_link_size; - if(OP_LIKELY(last_offset>_searched-avg_link_size) - &&OP_LIKELY(last_offset>1); - /*If we're within OP_CHUNK_SIZE of the start, scan forward.*/ - if(bisect-_searchedoffset-last>=0); - OP_ASSERT(_of->offset-last<=OP_PAGE_SIZE_MAX); - _sr[nsr].size=(opus_int32)(_of->offset-last); - _sr[nsr].serialno=serialno; - _sr[nsr].gp=gp; - nsr++; - } - } - else{ - _searched=_of->offset; - next_bias=OP_CHUNK_SIZE; - if(serialno==links[nlinks-1].serialno){ - /*This page was from the stream we want, remember it. - If it's the last such page in the link, we won't have to go back - looking for it later.*/ - end_gp=gp; - end_offset=last; - } - } - } - bisect=op_predict_link_start(_sr,nsr,_searched,end_searched,next_bias); - } - /*Bisection point found. - Get the final granule position of the previous link, assuming - op_find_initial_pcm_offset() didn't already determine the link was - empty.*/ - if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){ - if(end_gp==-1){ - /*If we don't know where the end page is, we'll have to seek back and - look for it, starting from the end of the link.*/ - end_offset=next; - /*Also forget the last page we read. - It won't be available after the seek.*/ - last=-1; - } - ret=op_find_final_pcm_offset(_of,serialnos,nserialnos, - links+nlinks-1,end_offset,links[nlinks-1].serialno,end_gp, - &total_duration); - if(OP_UNLIKELY(ret<0))return ret; - } - if(last!=next){ - /*The last page we read was not the first page the next link. - Move the cursor position to the offset of that first page. - This only performs an actual seek if the first page of the next link - does not start at the end of the last page from the current Opus - stream with a valid granule position.*/ - ret=op_seek_helper(_of,next); - if(OP_UNLIKELY(ret<0))return ret; - } - ret=op_fetch_headers(_of,&links[nlinks].head,&links[nlinks].tags, - _serialnos,_nserialnos,_cserialnos,last!=next?NULL:&og); - if(OP_UNLIKELY(ret<0))return ret; - links[nlinks].offset=next; - links[nlinks].data_offset=_of->offset; - links[nlinks].serialno=_of->os.serialno; - links[nlinks].pcm_end=-1; - /*This might consume a page from the next link, however the next bisection - always starts with a seek.*/ - ret=op_find_initial_pcm_offset(_of,links+nlinks,NULL); - if(OP_UNLIKELY(ret<0))return ret; - _searched=_of->offset; - /*Mark the current link count so it can be cleaned up on error.*/ - _of->nlinks=++nlinks; - } - /*Last page is in the starting serialno list, so we've reached the last link. - Now find the last granule position for it (if we didn't the first time we - looked at the end of the stream, and if op_find_initial_pcm_offset() - didn't already determine the link was empty).*/ - if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){ - ret=op_find_final_pcm_offset(_of,serialnos,nserialnos, - links+nlinks-1,_sr[0].offset,_sr[0].serialno,_sr[0].gp,&total_duration); - if(OP_UNLIKELY(ret<0))return ret; - } - /*Trim back the links array if necessary.*/ - links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*nlinks); - if(OP_LIKELY(links!=NULL))_of->links=links; - /*We also don't need these anymore.*/ - _ogg_free(*_serialnos); - *_serialnos=NULL; - *_cserialnos=*_nserialnos=0; - return 0; -} - -static void op_update_gain(OggOpusFile *_of){ - OpusHead *head; - opus_int32 gain_q8; - int li; - /*If decode isn't ready, then we'll apply the gain when we initialize the - decoder.*/ - if(_of->ready_stategain_offset_q8; - li=_of->seekable?_of->cur_link:0; - head=&_of->links[li].head; - /*We don't have to worry about overflow here because the header gain and - track gain must lie in the range [-32768,32767], and the user-supplied - offset has been pre-clamped to [-98302,98303].*/ - switch(_of->gain_type){ - case OP_ALBUM_GAIN:{ - int album_gain_q8; - album_gain_q8=0; - opus_tags_get_album_gain(&_of->links[li].tags,&album_gain_q8); - gain_q8+=album_gain_q8; - gain_q8+=head->output_gain; - }break; - case OP_TRACK_GAIN:{ - int track_gain_q8; - track_gain_q8=0; - opus_tags_get_track_gain(&_of->links[li].tags,&track_gain_q8); - gain_q8+=track_gain_q8; - gain_q8+=head->output_gain; - }break; - case OP_HEADER_GAIN:gain_q8+=head->output_gain;break; - case OP_ABSOLUTE_GAIN:break; - default:OP_ASSERT(0); - } - gain_q8=OP_CLAMP(-32768,gain_q8,32767); - OP_ASSERT(_of->od!=NULL); -#if defined(OPUS_SET_GAIN) - opus_multistream_decoder_ctl(_of->od,OPUS_SET_GAIN(gain_q8)); -#else -/*A fallback that works with both float and fixed-point is a bunch of work, - so just force people to use a sufficiently new version. - This is deployed well enough at this point that this shouldn't be a burden.*/ -# error "libopus 1.0.1 or later required" -#endif -} - -static int op_make_decode_ready(OggOpusFile *_of){ - const OpusHead *head; - int li; - int stream_count; - int coupled_count; - int channel_count; - if(_of->ready_state>OP_STREAMSET)return 0; - if(OP_UNLIKELY(_of->ready_stateseekable?_of->cur_link:0; - head=&_of->links[li].head; - stream_count=head->stream_count; - coupled_count=head->coupled_count; - channel_count=head->channel_count; - /*Check to see if the current decoder is compatible with the current link.*/ - if(_of->od!=NULL&&_of->od_stream_count==stream_count - &&_of->od_coupled_count==coupled_count&&_of->od_channel_count==channel_count - &&memcmp(_of->od_mapping,head->mapping, - sizeof(*head->mapping)*channel_count)==0){ - opus_multistream_decoder_ctl(_of->od,OPUS_RESET_STATE); - } - else{ - int err; - opus_multistream_decoder_destroy(_of->od); - _of->od=opus_multistream_decoder_create(48000,channel_count, - stream_count,coupled_count,head->mapping,&err); - if(_of->od==NULL)return OP_EFAULT; - _of->od_stream_count=stream_count; - _of->od_coupled_count=coupled_count; - _of->od_channel_count=channel_count; - memcpy(_of->od_mapping,head->mapping,sizeof(*head->mapping)*channel_count); - } - _of->ready_state=OP_INITSET; - _of->bytes_tracked=0; - _of->samples_tracked=0; -#if !defined(OP_FIXED_POINT) - _of->state_channel_count=0; - /*Use the serial number for the PRNG seed to get repeatable output for - straight play-throughs.*/ - _of->dither_seed=_of->links[li].serialno; -#endif - op_update_gain(_of); - return 0; -} - -static int op_open_seekable2_impl(OggOpusFile *_of){ - /*64 seek records should be enough for anybody. - Actually, with a bisection search in a 63-bit range down to OP_CHUNK_SIZE - granularity, much more than enough.*/ - OpusSeekRecord sr[64]; - opus_int64 data_offset; - int ret; - /*We can seek, so set out learning all about this file.*/ - (*_of->callbacks.seek)(_of->source,0,SEEK_END); - _of->offset=_of->end=(*_of->callbacks.tell)(_of->source); - if(OP_UNLIKELY(_of->end<0))return OP_EREAD; - data_offset=_of->links[0].data_offset; - if(OP_UNLIKELY(_of->endend, - _of->links[0].serialno,_of->serialnos,_of->nserialnos); - if(OP_UNLIKELY(ret<0))return ret; - /*If there's any trailing junk, forget about it.*/ - _of->end=sr[0].offset+sr[0].size; - if(OP_UNLIKELY(_of->endserialnos,&_of->nserialnos,&_of->cserialnos); -} - -static int op_open_seekable2(OggOpusFile *_of){ - ogg_sync_state oy_start; - ogg_stream_state os_start; - ogg_packet *op_start; - opus_int64 prev_page_offset; - opus_int64 start_offset; - int start_op_count; - int ret; - /*We're partially open and have a first link header state in storage in _of. - Save off that stream state so we can come back to it. - It would be simpler to just dump all this state and seek back to - links[0].data_offset when we're done. - But we do the extra work to allow us to seek back to _exactly_ the same - stream position we're at now. - This allows, e.g., the HTTP backend to continue reading from the original - connection (if it's still available), instead of opening a new one. - This means we can open and start playing a normal Opus file with a single - link and reasonable packet sizes using only two HTTP requests.*/ - start_op_count=_of->op_count; - /*This is a bit too large to put on the stack unconditionally.*/ - op_start=(ogg_packet *)_ogg_malloc(sizeof(*op_start)*start_op_count); - if(op_start==NULL)return OP_EFAULT; - *&oy_start=_of->oy; - *&os_start=_of->os; - prev_page_offset=_of->prev_page_offset; - start_offset=_of->offset; - memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count); - OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of)); - ogg_sync_init(&_of->oy); - ogg_stream_init(&_of->os,-1); - ret=op_open_seekable2_impl(_of); - /*Restore the old stream state.*/ - ogg_stream_clear(&_of->os); - ogg_sync_clear(&_of->oy); - *&_of->oy=*&oy_start; - *&_of->os=*&os_start; - _of->offset=start_offset; - _of->op_count=start_op_count; - memcpy(_of->op,op_start,sizeof(*_of->op)*start_op_count); - _ogg_free(op_start); - _of->prev_packet_gp=_of->links[0].pcm_start; - _of->prev_page_offset=prev_page_offset; - _of->cur_discard_count=_of->links[0].head.pre_skip; - if(OP_UNLIKELY(ret<0))return ret; - /*And restore the position indicator.*/ - ret=(*_of->callbacks.seek)(_of->source,op_position(_of),SEEK_SET); - return OP_UNLIKELY(ret<0)?OP_EREAD:0; -} - -/*Clear out the current logical bitstream decoder.*/ -static void op_decode_clear(OggOpusFile *_of){ - /*We don't actually free the decoder. - We might be able to re-use it for the next link.*/ - _of->op_count=0; - _of->od_buffer_size=0; - _of->prev_packet_gp=-1; - _of->prev_page_offset=-1; - if(!_of->seekable){ - OP_ASSERT(_of->ready_state>=OP_INITSET); - opus_tags_clear(&_of->links[0].tags); - } - _of->ready_state=OP_OPENED; -} - -static void op_clear(OggOpusFile *_of){ - OggOpusLink *links; - _ogg_free(_of->od_buffer); - if(_of->od!=NULL)opus_multistream_decoder_destroy(_of->od); - links=_of->links; - if(!_of->seekable){ - if(_of->ready_state>OP_OPENED||_of->ready_state==OP_PARTOPEN){ - opus_tags_clear(&links[0].tags); - } - } - else if(OP_LIKELY(links!=NULL)){ - int nlinks; - int link; - nlinks=_of->nlinks; - for(link=0;linkserialnos); - ogg_stream_clear(&_of->os); - ogg_sync_clear(&_of->oy); - if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->source); -} - -static int op_open1(OggOpusFile *_of, - void *_source,const OpusFileCallbacks *_cb, - const unsigned char *_initial_data,size_t _initial_bytes){ - ogg_page og; - ogg_page *pog; - int seekable; - int ret; - memset(_of,0,sizeof(*_of)); - _of->end=-1; - _of->source=_source; - *&_of->callbacks=*_cb; - /*At a minimum, we need to be able to read data.*/ - if(OP_UNLIKELY(_of->callbacks.read==NULL))return OP_EREAD; - /*Initialize the framing state.*/ - ogg_sync_init(&_of->oy); - /*Perhaps some data was previously read into a buffer for testing against - other stream types. - Allow initialization from this previously read data (especially as we may - be reading from a non-seekable stream). - This requires copying it into a buffer allocated by ogg_sync_buffer() and - doesn't support seeking, so this is not a good mechanism to use for - decoding entire files from RAM.*/ - if(_initial_bytes>0){ - char *buffer; - buffer=ogg_sync_buffer(&_of->oy,_initial_bytes); - memcpy(buffer,_initial_data,_initial_bytes*sizeof(*buffer)); - ogg_sync_wrote(&_of->oy,_initial_bytes); - } - /*Can we seek? - Stevens suggests the seek test is portable.*/ - seekable=_cb->seek!=NULL&&(*_cb->seek)(_source,0,SEEK_CUR)!=-1; - /*If seek is implemented, tell must also be implemented.*/ - if(seekable){ - opus_int64 pos; - if(OP_UNLIKELY(_of->callbacks.tell==NULL))return OP_EINVAL; - pos=(*_of->callbacks.tell)(_of->source); - /*If the current position is not equal to the initial bytes consumed, - absolute seeking will not work.*/ - if(OP_UNLIKELY(pos!=(opus_int64)_initial_bytes))return OP_EINVAL; - } - _of->seekable=seekable; - /*Don't seek yet. - Set up a 'single' (current) logical bitstream entry for partial open.*/ - _of->links=(OggOpusLink *)_ogg_malloc(sizeof(*_of->links)); - /*The serialno gets filled in later by op_fetch_headers().*/ - ogg_stream_init(&_of->os,-1); - pog=NULL; - for(;;){ - /*Fetch all BOS pages, store the Opus header and all seen serial numbers, - and load subsequent Opus setup headers.*/ - ret=op_fetch_headers(_of,&_of->links[0].head,&_of->links[0].tags, - &_of->serialnos,&_of->nserialnos,&_of->cserialnos,pog); - if(OP_UNLIKELY(ret<0))break; - _of->nlinks=1; - _of->links[0].offset=0; - _of->links[0].data_offset=_of->offset; - _of->links[0].pcm_end=-1; - _of->links[0].serialno=_of->os.serialno; - /*Fetch the initial PCM offset.*/ - ret=op_find_initial_pcm_offset(_of,_of->links,&og); - if(seekable||OP_LIKELY(ret<=0))break; - /*This link was empty, but we already have the BOS page for the next one in - og. - We can't seek, so start processing the next link right now.*/ - opus_tags_clear(&_of->links[0].tags); - _of->nlinks=0; - if(!seekable)_of->cur_link++; - pog=&og; - } - if(OP_LIKELY(ret>=0))_of->ready_state=OP_PARTOPEN; - return ret; -} - -static int op_open2(OggOpusFile *_of){ - int ret; - OP_ASSERT(_of->ready_state==OP_PARTOPEN); - if(_of->seekable){ - _of->ready_state=OP_OPENED; - ret=op_open_seekable2(_of); - } - else ret=0; - if(OP_LIKELY(ret>=0)){ - /*We have buffered packets from op_find_initial_pcm_offset(). - Move to OP_INITSET so we can use them.*/ - _of->ready_state=OP_STREAMSET; - ret=op_make_decode_ready(_of); - if(OP_LIKELY(ret>=0))return 0; - } - /*Don't auto-close the stream on failure.*/ - _of->callbacks.close=NULL; - op_clear(_of); - return ret; -} - -OggOpusFile *op_test_callbacks(void *_source,const OpusFileCallbacks *_cb, - const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ - OggOpusFile *of; - int ret; - of=(OggOpusFile *)_ogg_malloc(sizeof(*of)); - ret=OP_EFAULT; - if(OP_LIKELY(of!=NULL)){ - ret=op_open1(of,_source,_cb,_initial_data,_initial_bytes); - if(OP_LIKELY(ret>=0)){ - if(_error!=NULL)*_error=0; - return of; - } - /*Don't auto-close the stream on failure.*/ - of->callbacks.close=NULL; - op_clear(of); - _ogg_free(of); - } - if(_error!=NULL)*_error=ret; - return NULL; -} - -OggOpusFile *op_open_callbacks(void *_source,const OpusFileCallbacks *_cb, - const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ - OggOpusFile *of; - of=op_test_callbacks(_source,_cb,_initial_data,_initial_bytes,_error); - if(OP_LIKELY(of!=NULL)){ - int ret; - ret=op_open2(of); - if(OP_LIKELY(ret>=0))return of; - if(_error!=NULL)*_error=ret; - _ogg_free(of); - } - return NULL; -} - -/*Convenience routine to clean up from failure for the open functions that - create their own streams.*/ -static OggOpusFile *op_open_close_on_failure(void *_source, - const OpusFileCallbacks *_cb,int *_error){ - OggOpusFile *of; - if(OP_UNLIKELY(_source==NULL)){ - if(_error!=NULL)*_error=OP_EFAULT; - return NULL; - } - of=op_open_callbacks(_source,_cb,NULL,0,_error); - if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); - return of; -} - -OggOpusFile *op_open_file(const char *_path,int *_error){ - OpusFileCallbacks cb; - return op_open_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error); -} - -OggOpusFile *op_open_memory(const unsigned char *_data,size_t _size, - int *_error){ - OpusFileCallbacks cb; - return op_open_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb, - _error); -} - -/*Convenience routine to clean up from failure for the open functions that - create their own streams.*/ -static OggOpusFile *op_test_close_on_failure(void *_source, - const OpusFileCallbacks *_cb,int *_error){ - OggOpusFile *of; - if(OP_UNLIKELY(_source==NULL)){ - if(_error!=NULL)*_error=OP_EFAULT; - return NULL; - } - of=op_test_callbacks(_source,_cb,NULL,0,_error); - if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); - return of; -} - -OggOpusFile *op_test_file(const char *_path,int *_error){ - OpusFileCallbacks cb; - return op_test_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error); -} - -OggOpusFile *op_test_memory(const unsigned char *_data,size_t _size, - int *_error){ - OpusFileCallbacks cb; - return op_test_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb, - _error); -} - -int op_test_open(OggOpusFile *_of){ - int ret; - if(OP_UNLIKELY(_of->ready_state!=OP_PARTOPEN))return OP_EINVAL; - ret=op_open2(_of); - /*op_open2() will clear this structure on failure. - Reset its contents to prevent double-frees in op_free().*/ - if(OP_UNLIKELY(ret<0))memset(_of,0,sizeof(*_of)); - return ret; -} - -void op_free(OggOpusFile *_of){ - if(OP_LIKELY(_of!=NULL)){ - op_clear(_of); - _ogg_free(_of); - } -} - -int op_seekable(const OggOpusFile *_of){ - return _of->seekable; -} - -int op_link_count(const OggOpusFile *_of){ - return _of->nlinks; -} - -ogg_uint32_t op_serialno(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; - if(!_of->seekable)_li=0; - return _of->links[_li<0?_of->cur_link:_li].serialno; -} - -int op_channel_count(const OggOpusFile *_of,int _li){ - return op_head(_of,_li)->channel_count; -} - -opus_int64 op_raw_total(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_of->ready_stateseekable) - ||OP_UNLIKELY(_li>=_of->nlinks)){ - return OP_EINVAL; - } - if(_li<0)return _of->end-_of->links[0].offset; - return (_li+1>=_of->nlinks?_of->end:_of->links[_li+1].offset) - -_of->links[_li].offset; -} - -ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){ - OggOpusLink *links; - ogg_int64_t diff; - int nlinks; - nlinks=_of->nlinks; - if(OP_UNLIKELY(_of->ready_stateseekable) - ||OP_UNLIKELY(_li>=nlinks)){ - return OP_EINVAL; - } - links=_of->links; - /*We verify that the granule position differences are larger than the - pre-skip and that the total duration does not overflow during link - enumeration, so we don't have to check here.*/ - if(_li<0){ - ogg_int64_t pcm_total; - int li; - pcm_total=0; - for(li=0;li=_of->nlinks))_li=_of->nlinks-1; - if(!_of->seekable)_li=0; - return &_of->links[_li<0?_of->cur_link:_li].head; -} - -const OpusTags *op_tags(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; - if(!_of->seekable){ - if(_of->ready_stateready_state!=OP_PARTOPEN){ - return NULL; - } - _li=0; - } - else if(_li<0)_li=_of->ready_state>=OP_STREAMSET?_of->cur_link:0; - return &_of->links[_li].tags; -} - -int op_current_link(const OggOpusFile *_of){ - if(OP_UNLIKELY(_of->ready_statecur_link; -} - -/*Compute an average bitrate given a byte and sample count. - Return: The bitrate in bits per second.*/ -static opus_int32 op_calc_bitrate(opus_int64 _bytes,ogg_int64_t _samples){ - /*These rates are absurd, but let's handle them anyway.*/ - if(OP_UNLIKELY(_bytes>(OP_INT64_MAX-(_samples>>1))/(48000*8))){ - ogg_int64_t den; - if(OP_UNLIKELY(_bytes/(OP_INT32_MAX/(48000*8))>=_samples)){ - return OP_INT32_MAX; - } - den=_samples/(48000*8); - return (opus_int32)((_bytes+(den>>1))/den); - } - if(OP_UNLIKELY(_samples<=0))return OP_INT32_MAX; - /*This can't actually overflow in normal operation: even with a pre-skip of - 545 2.5 ms frames with 8 streams running at 1282*8+1 bytes per packet - (1275 byte frames + Opus framing overhead + Ogg lacing values), that all - produce a single sample of decoded output, we still don't top 45 Mbps. - The only way to get bitrates larger than that is with excessive Opus - padding, more encoded streams than output channels, or lots and lots of - Ogg pages with no packets on them.*/ - return (opus_int32)OP_MIN((_bytes*48000*8+(_samples>>1))/_samples, - OP_INT32_MAX); -} - -opus_int32 op_bitrate(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_of->ready_stateseekable) - ||OP_UNLIKELY(_li>=_of->nlinks)){ - return OP_EINVAL; - } - return op_calc_bitrate(op_raw_total(_of,_li),op_pcm_total(_of,_li)); -} - -opus_int32 op_bitrate_instant(OggOpusFile *_of){ - ogg_int64_t samples_tracked; - opus_int32 ret; - if(OP_UNLIKELY(_of->ready_statesamples_tracked; - if(OP_UNLIKELY(samples_tracked==0))return OP_FALSE; - ret=op_calc_bitrate(_of->bytes_tracked,samples_tracked); - _of->bytes_tracked=0; - _of->samples_tracked=0; - return ret; -} - -/*Fetch and process a page. - This handles the case where we're at a bitstream boundary and dumps the - decoding machine. - If the decoding machine is unloaded, it loads it. - It also keeps prev_packet_gp up to date (seek and read both use this). - Return: <0) Error, OP_HOLE (lost packet), or OP_EOF. - 0) Got at least one audio data packet.*/ -static int op_fetch_and_process_page(OggOpusFile *_of, - ogg_page *_og,opus_int64 _page_offset,int _spanp,int _ignore_holes){ - OggOpusLink *links; - ogg_uint32_t cur_serialno; - int seekable; - int cur_link; - int ret; - /*We shouldn't get here if we have unprocessed packets.*/ - OP_ASSERT(_of->ready_stateop_pos>=_of->op_count); - seekable=_of->seekable; - links=_of->links; - cur_link=seekable?_of->cur_link:0; - cur_serialno=links[cur_link].serialno; - /*Handle one page.*/ - for(;;){ - ogg_page og; - OP_ASSERT(_of->ready_state>=OP_OPENED); - /*If we were given a page to use, use it.*/ - if(_og!=NULL){ - *&og=*_og; - _og=NULL; - } - /*Keep reading until we get a page with the correct serialno.*/ - else _page_offset=op_get_next_page(_of,&og,_of->end); - /*EOF: Leave uninitialized.*/ - if(_page_offset<0)return _page_offsetready_state>=OP_STREAMSET) - &&cur_serialno!=(ogg_uint32_t)ogg_page_serialno(&og)){ - /*Two possibilities: - 1) Another stream is multiplexed into this logical section, or*/ - if(OP_LIKELY(!ogg_page_bos(&og)))continue; - /* 2) Our decoding just traversed a bitstream boundary.*/ - if(!_spanp)return OP_EOF; - if(OP_LIKELY(_of->ready_state>=OP_INITSET))op_decode_clear(_of); - } - /*Bitrate tracking: add the header's bytes here. - The body bytes are counted when we consume the packets.*/ - else _of->bytes_tracked+=og.header_len; - /*Do we need to load a new machine before submitting the page? - This is different in the seekable and non-seekable cases. - In the seekable case, we already have all the header information loaded - and cached. - We just initialize the machine with it and continue on our merry way. - In the non-seekable (streaming) case, we'll only be at a boundary if we - just left the previous logical bitstream, and we're now nominally at the - header of the next bitstream.*/ - if(OP_UNLIKELY(_of->ready_statenlinks; - for(li=0;li=nlinks)continue; - cur_serialno=serialno; - _of->cur_link=cur_link=li; - ogg_stream_reset_serialno(&_of->os,serialno); - _of->ready_state=OP_STREAMSET; - /*If we're at the start of this link, initialize the granule position - and pre-skip tracking.*/ - if(_page_offset<=links[cur_link].data_offset){ - _of->prev_packet_gp=links[cur_link].pcm_start; - _of->prev_page_offset=-1; - _of->cur_discard_count=links[cur_link].head.pre_skip; - /*Ignore a hole at the start of a new link (this is common for - streams joined in the middle) or after seeking.*/ - _ignore_holes=1; - } - } - else{ - do{ - /*We're streaming. - Fetch the two header packets, build the info struct.*/ - ret=op_fetch_headers(_of,&links[0].head,&links[0].tags, - NULL,NULL,NULL,&og); - if(OP_UNLIKELY(ret<0))return ret; - /*op_find_initial_pcm_offset() will suppress any initial hole for us, - so no need to set _ignore_holes.*/ - ret=op_find_initial_pcm_offset(_of,links,&og); - if(OP_UNLIKELY(ret<0))return ret; - _of->links[0].serialno=cur_serialno=_of->os.serialno; - _of->cur_link++; - } - /*If the link was empty, keep going, because we already have the - BOS page of the next one in og.*/ - while(OP_UNLIKELY(ret>0)); - /*If we didn't get any packets out of op_find_initial_pcm_offset(), - keep going (this is possible if end-trimming trimmed them all).*/ - if(_of->op_count<=0)continue; - /*Otherwise, we're done. - TODO: This resets bytes_tracked, which misses the header bytes - already processed by op_find_initial_pcm_offset().*/ - ret=op_make_decode_ready(_of); - if(OP_UNLIKELY(ret<0))return ret; - return 0; - } - } - /*The buffered page is the data we want, and we're ready for it. - Add it to the stream state.*/ - if(OP_UNLIKELY(_of->ready_state==OP_STREAMSET)){ - ret=op_make_decode_ready(_of); - if(OP_UNLIKELY(ret<0))return ret; - } - /*Extract all the packets from the current page.*/ - ogg_stream_pagein(&_of->os,&og); - if(OP_LIKELY(_of->ready_state>=OP_INITSET)){ - opus_int32 total_duration; - int durations[255]; - int op_count; - total_duration=op_collect_audio_packets(_of,durations); - if(OP_UNLIKELY(total_duration<0)){ - /*Drain the packets from the page anyway.*/ - total_duration=op_collect_audio_packets(_of,durations); - OP_ASSERT(total_duration>=0); - /*Report holes to the caller.*/ - if(!_ignore_holes)return OP_HOLE; - } - op_count=_of->op_count; - /*If we found at least one audio data packet, compute per-packet granule - positions for them.*/ - if(op_count>0){ - ogg_int64_t diff; - ogg_int64_t prev_packet_gp; - ogg_int64_t cur_packet_gp; - ogg_int64_t cur_page_gp; - int cur_page_eos; - int pi; - cur_page_gp=_of->op[op_count-1].granulepos; - cur_page_eos=_of->op[op_count-1].e_o_s; - prev_packet_gp=_of->prev_packet_gp; - if(OP_UNLIKELY(prev_packet_gp==-1)){ - opus_int32 cur_discard_count; - /*This is the first call after a raw seek. - Try to reconstruct prev_packet_gp from scratch.*/ - OP_ASSERT(seekable); - if(OP_UNLIKELY(cur_page_eos)){ - /*If the first page we hit after our seek was the EOS page, and - we didn't start from data_offset or before, we don't have - enough information to do end-trimming. - Proceed to the next link, rather than risk playing back some - samples that shouldn't have been played.*/ - _of->op_count=0; - continue; - } - /*By default discard 80 ms of data after a seek, unless we seek - into the pre-skip region.*/ - cur_discard_count=80*48; - cur_page_gp=_of->op[op_count-1].granulepos; - /*Try to initialize prev_packet_gp. - If the current page had packets but didn't have a granule - position, or the granule position it had was too small (both - illegal), just use the starting granule position for the link.*/ - prev_packet_gp=links[cur_link].pcm_start; - if(OP_LIKELY(cur_page_gp!=-1)){ - op_granpos_add(&prev_packet_gp,cur_page_gp,-total_duration); - } - if(OP_LIKELY(!op_granpos_diff(&diff, - prev_packet_gp,links[cur_link].pcm_start))){ - opus_int32 pre_skip; - /*If we start at the beginning of the pre-skip region, or we're - at least 80 ms from the end of the pre-skip region, we discard - to the end of the pre-skip region. - Otherwise, we still use the 80 ms default, which will discard - past the end of the pre-skip region.*/ - pre_skip=links[cur_link].head.pre_skip; - if(diff>=0&&diff<=OP_MAX(0,pre_skip-80*48)){ - cur_discard_count=pre_skip-(int)diff; - } - } - _of->cur_discard_count=cur_discard_count; - } - if(OP_UNLIKELY(cur_page_gp==-1)){ - /*This page had completed packets but didn't have a valid granule - position. - This is illegal, but we'll try to handle it by continuing to count - forwards from the previous page.*/ - if(op_granpos_add(&cur_page_gp,prev_packet_gp,total_duration)<0){ - /*The timestamp for this page overflowed.*/ - cur_page_gp=links[cur_link].pcm_end; - } - } - /*If we hit the last page, handle end-trimming.*/ - if(OP_UNLIKELY(cur_page_eos) - &&OP_LIKELY(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp)) - &&OP_LIKELY(diff0){ - /*If we trimmed the entire packet, stop (the spec says encoders - shouldn't do this, but we support it anyway).*/ - if(OP_UNLIKELY(diff>durations[pi]))break; - cur_packet_gp=cur_page_gp; - /*Move the EOS flag to this packet, if necessary, so we'll trim - the samples during decode.*/ - _of->op[pi].e_o_s=1; - } - else{ - /*Update the granule position as normal.*/ - OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp, - cur_packet_gp,durations[pi])); - } - _of->op[pi].granulepos=cur_packet_gp; - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,cur_packet_gp)); - } - } - else{ - /*Propagate timestamps to earlier packets. - op_granpos_add(&prev_packet_gp,prev_packet_gp,total_duration) - should succeed and give prev_packet_gp==cur_page_gp. - But we don't bother to check that, as there isn't much we can do - if it's not true, and it actually will not be true on the first - page after a seek, if there was a continued packet. - The only thing we guarantee is that the start and end granule - positions of the packets are valid, and that they are monotonic - within a page. - They might be completely out of range for this link (we'll check - that elsewhere), or non-monotonic between pages.*/ - if(OP_UNLIKELY(op_granpos_add(&prev_packet_gp, - cur_page_gp,-total_duration)<0)){ - /*The starting timestamp for the first packet on this page - underflowed. - This is illegal, but we ignore it.*/ - prev_packet_gp=0; - } - for(pi=0;pi=0); - OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp, - cur_packet_gp,durations[pi])); - _of->op[pi].granulepos=cur_packet_gp; - } - OP_ASSERT(total_duration==0); - } - _of->prev_packet_gp=prev_packet_gp; - _of->prev_page_offset=_page_offset; - _of->op_count=pi; - /*If end-trimming didn't trim all the packets, we're done.*/ - if(OP_LIKELY(pi>0))return 0; - } - } - } -} - -int op_raw_seek(OggOpusFile *_of,opus_int64 _pos){ - int ret; - if(OP_UNLIKELY(_of->ready_stateseekable))return OP_ENOSEEK; - if(OP_UNLIKELY(_pos<0)||OP_UNLIKELY(_pos>_of->end))return OP_EINVAL; - /*Clear out any buffered, decoded data.*/ - op_decode_clear(_of); - _of->bytes_tracked=0; - _of->samples_tracked=0; - ret=op_seek_helper(_of,_pos); - if(OP_UNLIKELY(ret<0))return OP_EREAD; - ret=op_fetch_and_process_page(_of,NULL,-1,1,1); - /*If we hit EOF, op_fetch_and_process_page() leaves us uninitialized. - Instead, jump to the end.*/ - if(ret==OP_EOF){ - int cur_link; - op_decode_clear(_of); - cur_link=_of->nlinks-1; - _of->cur_link=cur_link; - _of->prev_packet_gp=_of->links[cur_link].pcm_end; - _of->cur_discard_count=0; - ret=0; - } - return ret; -} - -/*Convert a PCM offset relative to the start of the whole stream to a granule - position in an individual link.*/ -static ogg_int64_t op_get_granulepos(const OggOpusFile *_of, - ogg_int64_t _pcm_offset,int *_li){ - const OggOpusLink *links; - ogg_int64_t duration; - int nlinks; - int li; - OP_ASSERT(_pcm_offset>=0); - nlinks=_of->nlinks; - links=_of->links; - for(li=0;OP_LIKELY(liOP_INT64_MAX-_pcm_offset)){ - /*Adding this amount to the granule position would overflow the positive - half of its 64-bit range. - Since signed overflow is undefined in C, do it in a way the compiler - isn't allowed to screw up.*/ - _pcm_offset-=OP_INT64_MAX-pcm_start+1; - pcm_start=OP_INT64_MIN; - } - pcm_start+=_pcm_offset; - *_li=li; - return pcm_start; - } - _pcm_offset-=duration; - } - return -1; -} - -/*A small helper to determine if an Ogg page contains data that continues onto - a subsequent page.*/ -static int op_page_continues(const ogg_page *_og){ - int nlacing; - OP_ASSERT(_og->header_len>=27); - nlacing=_og->header[26]; - OP_ASSERT(_og->header_len>=27+nlacing); - /*This also correctly handles the (unlikely) case of nlacing==0, because - 0!=255.*/ - return _og->header[27+nlacing-1]==255; -} - -/*A small helper to buffer the continued packet data from a page.*/ -static void op_buffer_continued_data(OggOpusFile *_of,ogg_page *_og){ - ogg_packet op; - ogg_stream_pagein(&_of->os,_og); - /*Drain any packets that did end on this page (and ignore holes). - We only care about the continued packet data.*/ - while(ogg_stream_packetout(&_of->os,&op)); -} - -/*This controls how close the target has to be to use the current stream - position to subdivide the initial range. - Two minutes seems to be a good default.*/ -#define OP_CUR_TIME_THRESH (120*48*(opus_int32)1000) - -/*Note: The OP_SMALL_FOOTPRINT #define doesn't (currently) save much code size, - but it's meant to serve as documentation for portions of the seeking - algorithm that are purely optional, to aid others learning from/porting this - code to other contexts.*/ -/*#define OP_SMALL_FOOTPRINT (1)*/ - -/*Search within link _li for the page with the highest granule position - preceding (or equal to) _target_gp. - There is a danger here: missing pages or incorrect frame number information - in the bitstream could make our task impossible. - Account for that (and report it as an error condition).*/ -static int op_pcm_seek_page(OggOpusFile *_of, - ogg_int64_t _target_gp,int _li){ - const OggOpusLink *link; - ogg_page og; - ogg_int64_t pcm_pre_skip; - ogg_int64_t pcm_start; - ogg_int64_t pcm_end; - ogg_int64_t best_gp; - ogg_int64_t diff; - ogg_uint32_t serialno; - opus_int32 pre_skip; - opus_int64 begin; - opus_int64 end; - opus_int64 boundary; - opus_int64 best; - opus_int64 best_start; - opus_int64 page_offset; - opus_int64 d0; - opus_int64 d1; - opus_int64 d2; - int force_bisect; - int buffering; - int ret; - _of->bytes_tracked=0; - _of->samples_tracked=0; - link=_of->links+_li; - best_gp=pcm_start=link->pcm_start; - pcm_end=link->pcm_end; - serialno=link->serialno; - best=best_start=begin=link->data_offset; - page_offset=-1; - buffering=0; - /*We discard the first 80 ms of data after a seek, so seek back that much - farther. - If we can't, simply seek to the beginning of the link.*/ - if(OP_UNLIKELY(op_granpos_add(&_target_gp,_target_gp,-80*48)<0) - ||OP_UNLIKELY(op_granpos_cmp(_target_gp,pcm_start)<0)){ - _target_gp=pcm_start; - } - /*Special case seeking to the start of the link.*/ - pre_skip=link->head.pre_skip; - OP_ALWAYS_TRUE(!op_granpos_add(&pcm_pre_skip,pcm_start,pre_skip)); - if(op_granpos_cmp(_target_gp,pcm_pre_skip)<0)end=boundary=begin; - else{ - end=boundary=link->end_offset; -#if !defined(OP_SMALL_FOOTPRINT) - /*If we were decoding from this link, we can narrow the range a bit.*/ - if(_li==_of->cur_link&&_of->ready_state>=OP_INITSET){ - opus_int64 offset; - int op_count; - op_count=_of->op_count; - /*The only way the offset can be invalid _and_ we can fail the granule - position checks below is if someone changed the contents of the last - page since we read it. - We'd be within our rights to just return OP_EBADLINK in that case, but - we'll simply ignore the current position instead.*/ - offset=_of->offset; - if(op_count>0&&OP_LIKELY(offset<=end)){ - ogg_int64_t gp; - /*Make sure the timestamp is valid. - The granule position might be -1 if we collected the packets from a - page without a granule position after reporting a hole.*/ - gp=_of->op[op_count-1].granulepos; - if(OP_LIKELY(gp!=-1)&&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<0) - &&OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)){ - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,gp,_target_gp)); - /*We only actually use the current time if either - a) We can cut off at least half the range, or - b) We're seeking sufficiently close to the current position that - it's likely to be informative. - Otherwise it appears using the whole link range to estimate the - first seek location gives better results, on average.*/ - if(diff<0){ - OP_ASSERT(offset>=begin); - if(offset-begin>=end-begin>>1||diff>-OP_CUR_TIME_THRESH){ - best=begin=offset; - best_gp=pcm_start=gp; - /*If we have buffered data from a continued packet, remember the - offset of the previous page's start, so that if we do wind up - having to seek back here later, we can prime the stream with - the continued packet data. - With no continued packet, we remember the end of the page.*/ - best_start=_of->os.body_returned<_of->os.body_fill? - _of->prev_page_offset:best; - /*If there's completed packets and data in the stream state, - prev_page_offset should always be set.*/ - OP_ASSERT(best_start>=0); - /*Buffer any continued packet data starting from here.*/ - buffering=1; - } - } - else{ - ogg_int64_t prev_page_gp; - /*We might get lucky and already have the packet with the target - buffered. - Worth checking. - For very small files (with all of the data in a single page, - generally 1 second or less), we can loop them continuously - without seeking at all.*/ - OP_ALWAYS_TRUE(!op_granpos_add(&prev_page_gp,_of->op[0].granulepos, - -op_get_packet_duration(_of->op[0].packet,_of->op[0].bytes))); - if(op_granpos_cmp(prev_page_gp,_target_gp)<=0){ - /*Don't call op_decode_clear(), because it will dump our - packets.*/ - _of->op_pos=0; - _of->od_buffer_size=0; - _of->prev_packet_gp=prev_page_gp; - /*_of->prev_page_offset already points to the right place.*/ - _of->ready_state=OP_STREAMSET; - return op_make_decode_ready(_of); - } - /*No such luck. - Check if we can cut off at least half the range, though.*/ - if(offset-begin<=end-begin>>1||diffos,serialno); - _of->cur_link=_li; - _of->ready_state=OP_STREAMSET; - /*Initialize the interval size history.*/ - d2=d1=d0=end-begin; - force_bisect=0; - while(begin>1; - d1=d2>>1; - d2=end-begin>>1; - if(force_bisect)bisect=begin+(end-begin>>1); - else{ - ogg_int64_t diff2; - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start)); - OP_ALWAYS_TRUE(!op_granpos_diff(&diff2,pcm_end,pcm_start)); - /*Take a (pretty decent) guess.*/ - bisect=begin+op_rescale64(diff,diff2,end-begin)-OP_CHUNK_SIZE; - } - if(bisect-OP_CHUNK_SIZEoffset){ - /*Discard any buffered continued packet data.*/ - if(buffering)ogg_stream_reset(&_of->os); - buffering=0; - page_offset=-1; - ret=op_seek_helper(_of,bisect); - if(OP_UNLIKELY(ret<0))return ret; - } - chunk_size=OP_CHUNK_SIZE; - next_boundary=boundary; - /*Now scan forward and figure out where we landed. - In the ideal case, we will see a page with a granule position at or - before our target, followed by a page with a granule position after our - target (or the end of the search interval). - Then we can just drop out and will have all of the data we need with no - additional seeking. - If we landed too far before, or after, we'll break out and do another - bisection.*/ - while(beginos); - buffering=0; - bisect=OP_MAX(bisect-chunk_size,begin); - ret=op_seek_helper(_of,bisect); - if(OP_UNLIKELY(ret<0))return ret; - /*Bump up the chunk size.*/ - chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); - /*If we did find a page from another stream or without a timestamp, - don't read past it.*/ - boundary=next_boundary; - } - } - else{ - ogg_int64_t gp; - int has_packets; - /*Save the offset of the first page we found after the seek, regardless - of the stream it came from or whether or not it has a timestamp.*/ - next_boundary=OP_MIN(page_offset,next_boundary); - if(serialno!=(ogg_uint32_t)ogg_page_serialno(&og))continue; - has_packets=ogg_page_packets(&og)>0; - /*Force the gp to -1 (as it should be per spec) if no packets end on - this page. - Otherwise we might get confused when we try to pull out a packet - with that timestamp and can't find it.*/ - gp=has_packets?ogg_page_granulepos(&og):-1; - if(gp==-1){ - if(buffering){ - if(OP_LIKELY(!has_packets))ogg_stream_pagein(&_of->os,&og); - else{ - /*If packets did end on this page, but we still didn't have a - valid granule position (in violation of the spec!), stop - buffering continued packet data. - Otherwise we might continue past the packet we actually - wanted.*/ - ogg_stream_reset(&_of->os); - buffering=0; - } - } - continue; - } - if(op_granpos_cmp(gp,_target_gp)<0){ - /*We found a page that ends before our target. - Advance to the raw offset of the next page.*/ - begin=_of->offset; - if(OP_UNLIKELY(op_granpos_cmp(pcm_start,gp)>0) - ||OP_UNLIKELY(op_granpos_cmp(pcm_end,gp)<0)){ - /*Don't let pcm_start get out of range! - That could happen with an invalid timestamp.*/ - break; - } - /*Save the byte offset of the end of the page with this granule - position.*/ - best=best_start=begin; - /*Buffer any data from a continued packet, if necessary. - This avoids the need to seek back here if the next timestamp we - encounter while scanning forward lies after our target.*/ - if(buffering)ogg_stream_reset(&_of->os); - if(op_page_continues(&og)){ - op_buffer_continued_data(_of,&og); - /*If we have a continued packet, remember the offset of this - page's start, so that if we do wind up having to seek back here - later, we can prime the stream with the continued packet data. - With no continued packet, we remember the end of the page.*/ - best_start=page_offset; - } - /*Then force buffering on, so that if a packet starts (but does not - end) on the next page, we still avoid the extra seek back.*/ - buffering=1; - best_gp=pcm_start=gp; - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start)); - /*If we're more than a second away from our target, break out and - do another bisection.*/ - if(diff>48000)break; - /*Otherwise, keep scanning forward (do NOT use begin+1).*/ - bisect=begin; - } - else{ - /*We found a page that ends after our target.*/ - /*If we scanned the whole interval before we found it, we're done.*/ - if(bisect<=begin+1)end=begin; - else{ - end=bisect; - /*In later iterations, don't read past the first page we found.*/ - boundary=next_boundary; - /*If we're not making much progress shrinking the interval size, - start forcing straight bisection to limit the worst case.*/ - force_bisect=end-begin>d0*2; - /*Don't let pcm_end get out of range! - That could happen with an invalid timestamp.*/ - if(OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0) - &&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<=0)){ - pcm_end=gp; - } - break; - } - } - } - } - } - /*Found our page.*/ - OP_ASSERT(op_granpos_cmp(best_gp,pcm_start)>=0); - /*Seek, if necessary. - If we were buffering data from a continued packet, we should be able to - continue to scan forward to get the rest of the data (even if - page_offset==-1). - Otherwise, we need to seek back to best_start.*/ - if(!buffering){ - if(best_start!=page_offset){ - page_offset=-1; - ret=op_seek_helper(_of,best_start); - if(OP_UNLIKELY(ret<0))return ret; - } - if(best_startend_offset); - if(OP_UNLIKELY(page_offsetprev_packet_gp=best_gp; - _of->prev_page_offset=best_start; - ret=op_fetch_and_process_page(_of,page_offset<0?NULL:&og,page_offset,0,1); - if(OP_UNLIKELY(ret<0))return OP_EBADLINK; - /*Verify result.*/ - if(OP_UNLIKELY(op_granpos_cmp(_of->prev_packet_gp,_target_gp)>0)){ - return OP_EBADLINK; - } - /*Our caller will set cur_discard_count to handle pre-roll.*/ - return 0; -} - -int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset){ - const OggOpusLink *link; - ogg_int64_t pcm_start; - ogg_int64_t target_gp; - ogg_int64_t prev_packet_gp; - ogg_int64_t skip; - ogg_int64_t diff; - int op_count; - int op_pos; - int ret; - int li; - if(OP_UNLIKELY(_of->ready_stateseekable))return OP_ENOSEEK; - if(OP_UNLIKELY(_pcm_offset<0))return OP_EINVAL; - target_gp=op_get_granulepos(_of,_pcm_offset,&li); - if(OP_UNLIKELY(target_gp==-1))return OP_EINVAL; - link=_of->links+li; - pcm_start=link->pcm_start; - OP_ALWAYS_TRUE(!op_granpos_diff(&_pcm_offset,target_gp,pcm_start)); -#if !defined(OP_SMALL_FOOTPRINT) - /*For small (90 ms or less) forward seeks within the same link, just decode - forward. - This also optimizes the case of seeking to the current position.*/ - if(li==_of->cur_link&&_of->ready_state>=OP_INITSET){ - ogg_int64_t gp; - gp=_of->prev_packet_gp; - if(OP_LIKELY(gp!=-1)){ - int nbuffered; - nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0); - OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered)); - /*We do _not_ add cur_discard_count to gp. - Otherwise the total amount to discard could grow without bound, and it - would be better just to do a full seek.*/ - if(OP_LIKELY(!op_granpos_diff(&diff,gp,pcm_start))){ - ogg_int64_t discard_count; - discard_count=_pcm_offset-diff; - /*We use a threshold of 90 ms instead of 80, since 80 ms is the - _minimum_ we would have discarded after a full seek. - Assuming 20 ms frames (the default), we'd discard 90 ms on average.*/ - if(discard_count>=0&&OP_UNLIKELY(discard_count<90*48)){ - _of->cur_discard_count=(opus_int32)discard_count; - return 0; - } - } - } - } -#endif - ret=op_pcm_seek_page(_of,target_gp,li); - if(OP_UNLIKELY(ret<0))return ret; - /*Now skip samples until we actually get to our target.*/ - /*Figure out where we should skip to.*/ - if(_pcm_offset<=link->head.pre_skip)skip=0; - else skip=OP_MAX(_pcm_offset-80*48,0); - OP_ASSERT(_pcm_offset-skip>=0); - OP_ASSERT(_pcm_offset-skipop_count; - prev_packet_gp=_of->prev_packet_gp; - for(op_pos=_of->op_pos;op_posop[op_pos].granulepos; - if(OP_LIKELY(!op_granpos_diff(&diff,cur_packet_gp,pcm_start)) - &&diff>skip){ - break; - } - prev_packet_gp=cur_packet_gp; - } - _of->prev_packet_gp=prev_packet_gp; - _of->op_pos=op_pos; - if(op_posskip)return OP_EBADLINK; - OP_ASSERT(_pcm_offset-diffcur_discard_count=(opus_int32)(_pcm_offset-diff); - return 0; -} - -opus_int64 op_raw_tell(const OggOpusFile *_of){ - if(OP_UNLIKELY(_of->ready_stateoffset; -} - -/*Convert a granule position from a given link to a PCM offset relative to the - start of the whole stream. - For unseekable sources, this gets reset to 0 at the beginning of each link.*/ -static ogg_int64_t op_get_pcm_offset(const OggOpusFile *_of, - ogg_int64_t _gp,int _li){ - const OggOpusLink *links; - ogg_int64_t pcm_offset; - ogg_int64_t delta; - int li; - links=_of->links; - pcm_offset=0; - OP_ASSERT(_li<_of->nlinks); - for(li=0;li<_li;li++){ - OP_ALWAYS_TRUE(!op_granpos_diff(&delta, - links[li].pcm_end,links[li].pcm_start)); - delta-=links[li].head.pre_skip; - pcm_offset+=delta; - } - OP_ASSERT(_li>=0); - if(_of->seekable&&OP_UNLIKELY(op_granpos_cmp(_gp,links[_li].pcm_end)>0)){ - _gp=links[_li].pcm_end; - } - if(OP_LIKELY(op_granpos_cmp(_gp,links[_li].pcm_start)>0)){ - if(OP_UNLIKELY(op_granpos_diff(&delta,_gp,links[_li].pcm_start)<0)){ - /*This means an unseekable stream claimed to have a page from more than - 2 billion days after we joined.*/ - OP_ASSERT(!_of->seekable); - return OP_INT64_MAX; - } - if(deltaready_stateprev_packet_gp; - if(gp==-1)return 0; - nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0); - OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered)); - li=_of->seekable?_of->cur_link:0; - if(op_granpos_add(&gp,gp,_of->cur_discard_count)<0){ - gp=_of->links[li].pcm_end; - } - return op_get_pcm_offset(_of,gp,li); -} - -void op_set_decode_callback(OggOpusFile *_of, - op_decode_cb_func _decode_cb,void *_ctx){ - _of->decode_cb=_decode_cb; - _of->decode_cb_ctx=_ctx; -} - -int op_set_gain_offset(OggOpusFile *_of, - int _gain_type,opus_int32 _gain_offset_q8){ - if(_gain_type!=OP_HEADER_GAIN&&_gain_type!=OP_ALBUM_GAIN - &&_gain_type!=OP_TRACK_GAIN&&_gain_type!=OP_ABSOLUTE_GAIN){ - return OP_EINVAL; - } - _of->gain_type=_gain_type; - /*The sum of header gain and track gain lies in the range [-65536,65534]. - These bounds allow the offset to set the final value to anywhere in the - range [-32768,32767], which is what we'll clamp it to before applying.*/ - _of->gain_offset_q8=OP_CLAMP(-98302,_gain_offset_q8,98303); - op_update_gain(_of); - return 0; -} - -void op_set_dither_enabled(OggOpusFile *_of,int _enabled){ -#if !defined(OP_FIXED_POINT) - _of->dither_disabled=!_enabled; - if(!_enabled)_of->dither_mute=65; -#endif -} - -/*Allocate the decoder scratch buffer. - This is done lazily, since if the user provides large enough buffers, we'll - never need it.*/ -static int op_init_buffer(OggOpusFile *_of){ - int nchannels_max; - if(_of->seekable){ - const OggOpusLink *links; - int nlinks; - int li; - links=_of->links; - nlinks=_of->nlinks; - nchannels_max=1; - for(li=0;liod_buffer=(op_sample *)_ogg_malloc( - sizeof(*_of->od_buffer)*nchannels_max*120*48); - if(_of->od_buffer==NULL)return OP_EFAULT; - return 0; -} - -/*Decode a single packet into the target buffer.*/ -static int op_decode(OggOpusFile *_of,op_sample *_pcm, - const ogg_packet *_op,int _nsamples,int _nchannels){ - int ret; - /*First we try using the application-provided decode callback.*/ - if(_of->decode_cb!=NULL){ -#if defined(OP_FIXED_POINT) - ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op, - _nsamples,_nchannels,OP_DEC_FORMAT_SHORT,_of->cur_link); -#else - ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op, - _nsamples,_nchannels,OP_DEC_FORMAT_FLOAT,_of->cur_link); -#endif - } - else ret=OP_DEC_USE_DEFAULT; - /*If the application didn't want to handle decoding, do it ourselves.*/ - if(ret==OP_DEC_USE_DEFAULT){ -#if defined(OP_FIXED_POINT) - ret=opus_multistream_decode(_of->od, - _op->packet,_op->bytes,_pcm,_nsamples,0); -#else - ret=opus_multistream_decode_float(_of->od, - _op->packet,_op->bytes,_pcm,_nsamples,0); -#endif - OP_ASSERT(ret<0||ret==_nsamples); - } - /*If the application returned a positive value other than 0 or - OP_DEC_USE_DEFAULT, fail.*/ - else if(OP_UNLIKELY(ret>0))return OP_EBADPACKET; - if(OP_UNLIKELY(ret<0))return OP_EBADPACKET; - return ret; -} - -/*Read more samples from the stream, using the same API as op_read() or - op_read_float().*/ -static int op_read_native(OggOpusFile *_of, - op_sample *_pcm,int _buf_size,int *_li){ - if(OP_UNLIKELY(_of->ready_stateready_state>=OP_INITSET)){ - int nchannels; - int od_buffer_pos; - int nsamples; - int op_pos; - nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count; - od_buffer_pos=_of->od_buffer_pos; - nsamples=_of->od_buffer_size-od_buffer_pos; - /*If we have buffered samples, return them.*/ - if(nsamples>0){ - if(nsamples*nchannels>_buf_size)nsamples=_buf_size/nchannels; - memcpy(_pcm,_of->od_buffer+nchannels*od_buffer_pos, - sizeof(*_pcm)*nchannels*nsamples); - od_buffer_pos+=nsamples; - _of->od_buffer_pos=od_buffer_pos; - if(_li!=NULL)*_li=_of->cur_link; - return nsamples; - } - /*If we have buffered packets, decode one.*/ - op_pos=_of->op_pos; - if(OP_LIKELY(op_pos<_of->op_count)){ - const ogg_packet *pop; - ogg_int64_t diff; - opus_int32 cur_discard_count; - int duration; - int trimmed_duration; - pop=_of->op+op_pos++; - _of->op_pos=op_pos; - cur_discard_count=_of->cur_discard_count; - duration=op_get_packet_duration(pop->packet,pop->bytes); - /*We don't buffer packets with an invalid TOC sequence.*/ - OP_ASSERT(duration>0); - trimmed_duration=duration; - /*Perform end-trimming.*/ - if(OP_UNLIKELY(pop->e_o_s)){ - if(OP_UNLIKELY(op_granpos_cmp(pop->granulepos, - _of->prev_packet_gp)<=0)){ - trimmed_duration=0; - } - else if(OP_LIKELY(!op_granpos_diff(&diff, - pop->granulepos,_of->prev_packet_gp))){ - trimmed_duration=(int)OP_MIN(diff,trimmed_duration); - } - } - _of->prev_packet_gp=pop->granulepos; - if(OP_UNLIKELY(duration*nchannels>_buf_size)){ - op_sample *buf; - /*If the user's buffer is too small, decode into a scratch buffer.*/ - buf=_of->od_buffer; - if(OP_UNLIKELY(buf==NULL)){ - ret=op_init_buffer(_of); - if(OP_UNLIKELY(ret<0))return ret; - buf=_of->od_buffer; - } - ret=op_decode(_of,buf,pop,duration,nchannels); - if(OP_UNLIKELY(ret<0))return ret; - /*Perform pre-skip/pre-roll.*/ - od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count); - cur_discard_count-=od_buffer_pos; - _of->cur_discard_count=cur_discard_count; - _of->od_buffer_pos=od_buffer_pos; - _of->od_buffer_size=trimmed_duration; - /*Update bitrate tracking based on the actual samples we used from - what was decoded.*/ - _of->bytes_tracked+=pop->bytes; - _of->samples_tracked+=trimmed_duration-od_buffer_pos; - } - else{ - /*Otherwise decode directly into the user's buffer.*/ - ret=op_decode(_of,_pcm,pop,duration,nchannels); - if(OP_UNLIKELY(ret<0))return ret; - if(OP_LIKELY(trimmed_duration>0)){ - /*Perform pre-skip/pre-roll.*/ - od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count); - cur_discard_count-=od_buffer_pos; - _of->cur_discard_count=cur_discard_count; - trimmed_duration-=od_buffer_pos; - if(OP_LIKELY(trimmed_duration>0) - &&OP_UNLIKELY(od_buffer_pos>0)){ - memmove(_pcm,_pcm+od_buffer_pos*nchannels, - sizeof(*_pcm)*trimmed_duration*nchannels); - } - /*Update bitrate tracking based on the actual samples we used from - what was decoded.*/ - _of->bytes_tracked+=pop->bytes; - _of->samples_tracked+=trimmed_duration; - if(OP_LIKELY(trimmed_duration>0)){ - if(_li!=NULL)*_li=_of->cur_link; - return trimmed_duration; - } - } - } - /*Don't grab another page yet. - This one might have more packets, or might have buffered data now.*/ - continue; - } - } - /*Suck in another page.*/ - ret=op_fetch_and_process_page(_of,NULL,-1,1,0); - if(OP_UNLIKELY(ret==OP_EOF)){ - if(_li!=NULL)*_li=_of->cur_link; - return 0; - } - if(OP_UNLIKELY(ret<0))return ret; - } -} - -/*A generic filter to apply to the decoded audio data. - _src is non-const because we will destructively modify the contents of the - source buffer that we consume in some cases.*/ -typedef int (*op_read_filter_func)(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels); - -/*Decode some samples and then apply a custom filter to them. - This is used to convert to different output formats.*/ -static int op_filter_read_native(OggOpusFile *_of,void *_dst,int _dst_sz, - op_read_filter_func _filter,int *_li){ - int ret; - /*Ensure we have some decoded samples in our buffer.*/ - ret=op_read_native(_of,NULL,0,_li); - /*Now apply the filter to them.*/ - if(OP_LIKELY(ret>=0)&&OP_LIKELY(_of->ready_state>=OP_INITSET)){ - int od_buffer_pos; - od_buffer_pos=_of->od_buffer_pos; - ret=_of->od_buffer_size-od_buffer_pos; - if(OP_LIKELY(ret>0)){ - int nchannels; - nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count; - ret=(*_filter)(_of,_dst,_dst_sz, - _of->od_buffer+nchannels*od_buffer_pos,ret,nchannels); - OP_ASSERT(ret>=0); - OP_ASSERT(ret<=_of->od_buffer_size-od_buffer_pos); - od_buffer_pos+=ret; - _of->od_buffer_pos=od_buffer_pos; - } - } - return ret; -} - -#if !defined(OP_FIXED_POINT)||!defined(OP_DISABLE_FLOAT_API) - -/*Matrices for downmixing from the supported channel counts to stereo. - The matrices with 5 or more channels are normalized to a total volume of 2.0, - since most mixes sound too quiet if normalized to 1.0 (as there is generally - little volume in the side/rear channels).*/ -static const float OP_STEREO_DOWNMIX[OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={ - /*3.0*/ - { - {0.5858F,0.0F},{0.4142F,0.4142F},{0.0F,0.5858F} - }, - /*quadrophonic*/ - { - {0.4226F,0.0F},{0.0F,0.4226F},{0.366F,0.2114F},{0.2114F,0.336F} - }, - /*5.0*/ - { - {0.651F,0.0F},{0.46F,0.46F},{0.0F,0.651F},{0.5636F,0.3254F}, - {0.3254F,0.5636F} - }, - /*5.1*/ - { - {0.529F,0.0F},{0.3741F,0.3741F},{0.0F,0.529F},{0.4582F,0.2645F}, - {0.2645F,0.4582F},{0.3741F,0.3741F} - }, - /*6.1*/ - { - {0.4553F,0.0F},{0.322F,0.322F},{0.0F,0.4553F},{0.3943F,0.2277F}, - {0.2277F,0.3943F},{0.2788F,0.2788F},{0.322F,0.322F} - }, - /*7.1*/ - { - {0.3886F,0.0F},{0.2748F,0.2748F},{0.0F,0.3886F},{0.3366F,0.1943F}, - {0.1943F,0.3366F},{0.3366F,0.1943F},{0.1943F,0.3366F},{0.2748F,0.2748F} - } -}; - -#endif - -#if defined(OP_FIXED_POINT) - -/*Matrices for downmixing from the supported channel counts to stereo. - The matrices with 5 or more channels are normalized to a total volume of 2.0, - since most mixes sound too quiet if normalized to 1.0 (as there is generally - little volume in the side/rear channels). - Hence we keep the coefficients in Q14, so the downmix values won't overflow a - 32-bit number.*/ -static const opus_int16 OP_STEREO_DOWNMIX_Q14 - [OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={ - /*3.0*/ - { - {9598,0},{6786,6786},{0,9598} - }, - /*quadrophonic*/ - { - {6924,0},{0,6924},{5996,3464},{3464,5996} - }, - /*5.0*/ - { - {10666,0},{7537,7537},{0,10666},{9234,5331},{5331,9234} - }, - /*5.1*/ - { - {8668,0},{6129,6129},{0,8668},{7507,4335},{4335,7507},{6129,6129} - }, - /*6.1*/ - { - {7459,0},{5275,5275},{0,7459},{6460,3731},{3731,6460},{4568,4568}, - {5275,5275} - }, - /*7.1*/ - { - {6368,0},{4502,4502},{0,6368},{5515,3183},{3183,5515},{5515,3183}, - {3183,5515},{4502,4502} - } -}; - -int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){ - return op_read_native(_of,_pcm,_buf_size,_li); -} - -static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels){ - (void)_of; - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src)); - else{ - opus_int16 *dst; - int i; - dst=(opus_int16 *)_dst; - if(_nchannels==1){ - for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i]; - } - else{ - for(i=0;i<_nsamples;i++){ - opus_int32 l; - opus_int32 r; - int ci; - l=r=0; - for(ci=0;ci<_nchannels;ci++){ - opus_int32 s; - s=_src[_nchannels*i+ci]; - l+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][0]*s; - r+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][1]*s; - } - /*TODO: For 5 or more channels, we should do soft clipping here.*/ - dst[2*i+0]=(opus_int16)OP_CLAMP(-32768,l+8192>>14,32767); - dst[2*i+1]=(opus_int16)OP_CLAMP(-32768,r+8192>>14,32767); - } - } - } - return _nsamples; -} - -int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){ - return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL); -} - -# if !defined(OP_DISABLE_FLOAT_API) - -static int op_short2float_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels){ - float *dst; - int i; - (void)_of; - dst=(float *)_dst; - if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels; - _dst_sz=_nsamples*_nchannels; - for(i=0;i<_dst_sz;i++)dst[i]=(1.0F/32768)*_src[i]; - return _nsamples; -} - -int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){ - return op_filter_read_native(_of,_pcm,_buf_size,op_short2float_filter,_li); -} - -static int op_short2float_stereo_filter(OggOpusFile *_of, - void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){ - float *dst; - int i; - dst=(float *)_dst; - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - if(_nchannels==1){ - _nsamples=op_short2float_filter(_of,dst,_nsamples,_src,_nsamples,1); - for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i]; - } - else if(_nchannels<5){ - /*For 3 or 4 channels, we can downmix in fixed point without risk of - clipping.*/ - if(_nchannels>2){ - _nsamples=op_stereo_filter(_of,_src,_nsamples*2, - _src,_nsamples,_nchannels); - } - return op_short2float_filter(_of,dst,_dst_sz,_src,_nsamples,2); - } - else{ - /*For 5 or more channels, we convert to floats and then downmix (so that we - don't risk clipping).*/ - for(i=0;i<_nsamples;i++){ - float l; - float r; - int ci; - l=r=0; - for(ci=0;ci<_nchannels;ci++){ - float s; - s=(1.0F/32768)*_src[_nchannels*i+ci]; - l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*s; - r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*s; - } - dst[2*i+0]=l; - dst[2*i+1]=r; - } - } - return _nsamples; -} - -int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){ - return op_filter_read_native(_of,_pcm,_buf_size, - op_short2float_stereo_filter,NULL); -} - -# endif - -#else - -# if defined(OP_HAVE_LRINTF) -# include -# define op_float2int(_x) (lrintf(_x)) -# else -# define op_float2int(_x) ((int)((_x)+((_x)<0?-0.5F:0.5F))) -# endif - -/*The dithering code here is adapted from opusdec, part of opus-tools. - It was originally written by Greg Maxwell.*/ - -static opus_uint32 op_rand(opus_uint32 _seed){ - return _seed*96314165+907633515&0xFFFFFFFFU; -} - -/*This implements 16-bit quantization with full triangular dither and IIR noise - shaping. - The noise shaping filters were designed by Sebastian Gesemann, and are based - on the LAME ATH curves with flattening to limit their peak gain to 20 dB. - Everyone else's noise shaping filters are mildly crazy. - The 48 kHz version of this filter is just a warped version of the 44.1 kHz - filter and probably could be improved by shifting the HF shelf up in - frequency a little bit, since 48 kHz has a bit more room and being more - conservative against bat-ears is probably more important than more noise - suppression. - This process can increase the peak level of the signal (in theory by the peak - error of 1.5 +20 dB, though that is unobservably rare). - To avoid clipping, the signal is attenuated by a couple thousandths of a dB. - Initially, the approach taken here was to only attenuate by the 99.9th - percentile, making clipping rare but not impossible (like SoX), but the - limited gain of the filter means that the worst case was only two - thousandths of a dB more, so this just uses the worst case. - The attenuation is probably also helpful to prevent clipping in the DAC - reconstruction filters or downstream resampling, in any case.*/ - -# define OP_GAIN (32753.0F) - -# define OP_PRNG_GAIN (1.0F/0xFFFFFFFF) - -/*48 kHz noise shaping filter, sd=2.34.*/ - -static const float OP_FCOEF_B[4]={ - 2.2374F,-0.7339F,-0.1251F,-0.6033F -}; - -static const float OP_FCOEF_A[4]={ - 0.9030F,0.0116F,-0.5853F,-0.2571F -}; - -static int op_float2short_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - float *_src,int _nsamples,int _nchannels){ - opus_int16 *dst; - int ci; - int i; - dst=(opus_int16 *)_dst; - if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels; -# if defined(OP_SOFT_CLIP) - if(_of->state_channel_count!=_nchannels){ - for(ci=0;ci<_nchannels;ci++)_of->clip_state[ci]=0; - } - opus_pcm_soft_clip(_src,_nsamples,_nchannels,_of->clip_state); -# endif - if(_of->dither_disabled){ - for(i=0;i<_nchannels*_nsamples;i++){ - dst[i]=op_float2int(OP_CLAMP(-32768,32768.0F*_src[i],32767)); - } - } - else{ - opus_uint32 seed; - int mute; - seed=_of->dither_seed; - mute=_of->dither_mute; - if(_of->state_channel_count!=_nchannels)mute=65; - /*In order to avoid replacing digital silence with quiet dither noise, we - mute if the output has been silent for a while.*/ - if(mute>64)memset(_of->dither_a,0,sizeof(*_of->dither_a)*4*_nchannels); - for(i=0;i<_nsamples;i++){ - int silent; - silent=1; - for(ci=0;ci<_nchannels;ci++){ - float r; - float s; - float err; - int si; - int j; - s=_src[_nchannels*i+ci]; - silent&=s==0; - s*=OP_GAIN; - err=0; - for(j=0;j<4;j++){ - err+=OP_FCOEF_B[j]*_of->dither_b[ci*4+j] - -OP_FCOEF_A[j]*_of->dither_a[ci*4+j]; - } - for(j=3;j-->0;)_of->dither_a[ci*4+j+1]=_of->dither_a[ci*4+j]; - for(j=3;j-->0;)_of->dither_b[ci*4+j+1]=_of->dither_b[ci*4+j]; - _of->dither_a[ci*4]=err; - s-=err; - if(mute>16)r=0; - else{ - seed=op_rand(seed); - r=seed*OP_PRNG_GAIN; - seed=op_rand(seed); - r-=seed*OP_PRNG_GAIN; - } - /*Clamp in float out of paranoia that the input will be > 96 dBFS and - wrap if the integer is clamped.*/ - si=op_float2int(OP_CLAMP(-32768,s+r,32767)); - dst[_nchannels*i+ci]=(opus_int16)si; - /*Including clipping in the noise shaping is generally disastrous: the - futile effort to restore the clipped energy results in more clipping. - However, small amounts---at the level which could normally be created - by dither and rounding---are harmless and can even reduce clipping - somewhat due to the clipping sometimes reducing the dither + rounding - error.*/ - _of->dither_b[ci*4]=mute>16?0:OP_CLAMP(-1.5F,si-s,1.5F); - } - mute++; - if(!silent)mute=0; - } - _of->dither_mute=OP_MIN(mute,65); - _of->dither_seed=seed; - } - _of->state_channel_count=_nchannels; - return _nsamples; -} - -int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){ - return op_filter_read_native(_of,_pcm,_buf_size,op_float2short_filter,_li); -} - -int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){ - _of->state_channel_count=0; - return op_read_native(_of,_pcm,_buf_size,_li); -} - -static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels){ - (void)_of; - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src)); - else{ - float *dst; - int i; - dst=(float *)_dst; - if(_nchannels==1){ - for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i]; - } - else{ - for(i=0;i<_nsamples;i++){ - float l; - float r; - int ci; - l=r=0; - for(ci=0;ci<_nchannels;ci++){ - l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*_src[_nchannels*i+ci]; - r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*_src[_nchannels*i+ci]; - } - dst[2*i+0]=l; - dst[2*i+1]=r; - } - } - } - return _nsamples; -} - -static int op_float2short_stereo_filter(OggOpusFile *_of, - void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){ - opus_int16 *dst; - dst=(opus_int16 *)_dst; - if(_nchannels==1){ - int i; - _nsamples=op_float2short_filter(_of,dst,_dst_sz>>1,_src,_nsamples,1); - for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i]; - } - else{ - if(_nchannels>2){ - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - _nsamples=op_stereo_filter(_of,_src,_nsamples*2, - _src,_nsamples,_nchannels); - } - _nsamples=op_float2short_filter(_of,dst,_dst_sz,_src,_nsamples,2); - } - return _nsamples; -} - -int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){ - return op_filter_read_native(_of,_pcm,_buf_size, - op_float2short_stereo_filter,NULL); -} - -int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){ - _of->state_channel_count=0; - return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL); -} - -#endif diff --git a/thirdparty/opus/repacketizer.c b/thirdparty/opus/repacketizer.c deleted file mode 100644 index c80ee7f001..0000000000 --- a/thirdparty/opus/repacketizer.c +++ /dev/null @@ -1,348 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus.h" -#include "opus_private.h" -#include "os_support.h" - - -int opus_repacketizer_get_size(void) -{ - return sizeof(OpusRepacketizer); -} - -OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) -{ - rp->nb_frames = 0; - return rp; -} - -OpusRepacketizer *opus_repacketizer_create(void) -{ - OpusRepacketizer *rp; - rp=(OpusRepacketizer *)opus_alloc(opus_repacketizer_get_size()); - if(rp==NULL)return NULL; - return opus_repacketizer_init(rp); -} - -void opus_repacketizer_destroy(OpusRepacketizer *rp) -{ - opus_free(rp); -} - -static int opus_repacketizer_cat_impl(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len, int self_delimited) -{ - unsigned char tmp_toc; - int curr_nb_frames,ret; - /* Set of check ToC */ - if (len<1) return OPUS_INVALID_PACKET; - if (rp->nb_frames == 0) - { - rp->toc = data[0]; - rp->framesize = opus_packet_get_samples_per_frame(data, 8000); - } else if ((rp->toc&0xFC) != (data[0]&0xFC)) - { - /*fprintf(stderr, "toc mismatch: 0x%x vs 0x%x\n", rp->toc, data[0]);*/ - return OPUS_INVALID_PACKET; - } - curr_nb_frames = opus_packet_get_nb_frames(data, len); - if(curr_nb_frames<1) return OPUS_INVALID_PACKET; - - /* Check the 120 ms maximum packet size */ - if ((curr_nb_frames+rp->nb_frames)*rp->framesize > 960) - { - return OPUS_INVALID_PACKET; - } - - ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames], NULL, NULL); - if(ret<1)return ret; - - rp->nb_frames += curr_nb_frames; - return OPUS_OK; -} - -int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) -{ - return opus_repacketizer_cat_impl(rp, data, len, 0); -} - -int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) -{ - return rp->nb_frames; -} - -opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, - unsigned char *data, opus_int32 maxlen, int self_delimited, int pad) -{ - int i, count; - opus_int32 tot_size; - opus_int16 *len; - const unsigned char **frames; - unsigned char * ptr; - - if (begin<0 || begin>=end || end>rp->nb_frames) - { - /*fprintf(stderr, "%d %d %d\n", begin, end, rp->nb_frames);*/ - return OPUS_BAD_ARG; - } - count = end-begin; - - len = rp->len+begin; - frames = rp->frames+begin; - if (self_delimited) - tot_size = 1 + (len[count-1]>=252); - else - tot_size = 0; - - ptr = data; - if (count==1) - { - /* Code 0 */ - tot_size += len[0]+1; - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = rp->toc&0xFC; - } else if (count==2) - { - if (len[1] == len[0]) - { - /* Code 1 */ - tot_size += 2*len[0]+1; - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x1; - } else { - /* Code 2 */ - tot_size += len[0]+len[1]+2+(len[0]>=252); - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x2; - ptr += encode_size(len[0], ptr); - } - } - if (count > 2 || (pad && tot_size < maxlen)) - { - /* Code 3 */ - int vbr; - int pad_amount=0; - - /* Restart the process for the padding case */ - ptr = data; - if (self_delimited) - tot_size = 1 + (len[count-1]>=252); - else - tot_size = 0; - vbr = 0; - for (i=1;i=252) + len[i]; - tot_size += len[count-1]; - - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x3; - *ptr++ = count | 0x80; - } else { - tot_size += count*len[0]+2; - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x3; - *ptr++ = count; - } - pad_amount = pad ? (maxlen-tot_size) : 0; - if (pad_amount != 0) - { - int nb_255s; - data[1] |= 0x40; - nb_255s = (pad_amount-1)/255; - for (i=0;inb_frames, data, maxlen, 0, 0); -} - -int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len) -{ - OpusRepacketizer rp; - opus_int32 ret; - if (len < 1) - return OPUS_BAD_ARG; - if (len==new_len) - return OPUS_OK; - else if (len > new_len) - return OPUS_BAD_ARG; - opus_repacketizer_init(&rp); - /* Moving payload to the end of the packet so we can do in-place padding */ - OPUS_MOVE(data+new_len-len, data, len); - ret = opus_repacketizer_cat(&rp, data+new_len-len, len); - if (ret != OPUS_OK) - return ret; - ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1); - if (ret > 0) - return OPUS_OK; - else - return ret; -} - -opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len) -{ - OpusRepacketizer rp; - opus_int32 ret; - if (len < 1) - return OPUS_BAD_ARG; - opus_repacketizer_init(&rp); - ret = opus_repacketizer_cat(&rp, data, len); - if (ret < 0) - return ret; - ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0); - celt_assert(ret > 0 && ret <= len); - return ret; -} - -int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams) -{ - int s; - int count; - unsigned char toc; - opus_int16 size[48]; - opus_int32 packet_offset; - opus_int32 amount; - - if (len < 1) - return OPUS_BAD_ARG; - if (len==new_len) - return OPUS_OK; - else if (len > new_len) - return OPUS_BAD_ARG; - amount = new_len - len; - /* Seek to last stream */ - for (s=0;s -#include -#include - -#define MAX_PACKETOUT 32000 - -void usage(char *argv0) -{ - fprintf(stderr, "usage: %s [options] input_file output_file\n", argv0); -} - -static void int_to_char(opus_uint32 i, unsigned char ch[4]) -{ - ch[0] = i>>24; - ch[1] = (i>>16)&0xFF; - ch[2] = (i>>8)&0xFF; - ch[3] = i&0xFF; -} - -static opus_uint32 char_to_int(unsigned char ch[4]) -{ - return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16) - | ((opus_uint32)ch[2]<< 8) | (opus_uint32)ch[3]; -} - -int main(int argc, char *argv[]) -{ - int i, eof=0; - FILE *fin, *fout; - unsigned char packets[48][1500]; - int len[48]; - int rng[48]; - OpusRepacketizer *rp; - unsigned char output_packet[MAX_PACKETOUT]; - int merge = 1, split=0; - - if (argc < 3) - { - usage(argv[0]); - return EXIT_FAILURE; - } - for (i=1;i48) - { - fprintf(stderr, "-merge parameter must be less than 48.\n"); - return EXIT_FAILURE; - } - i++; - } else if (strcmp(argv[i], "-split")==0) - split = 1; - else - { - fprintf(stderr, "Unknown option: %s\n", argv[i]); - usage(argv[0]); - return EXIT_FAILURE; - } - } - fin = fopen(argv[argc-2], "r"); - if(fin==NULL) - { - fprintf(stderr, "Error opening input file: %s\n", argv[argc-2]); - return EXIT_FAILURE; - } - fout = fopen(argv[argc-1], "w"); - if(fout==NULL) - { - fprintf(stderr, "Error opening output file: %s\n", argv[argc-1]); - fclose(fin); - return EXIT_FAILURE; - } - - rp = opus_repacketizer_create(); - while (!eof) - { - int err; - int nb_packets=merge; - opus_repacketizer_init(rp); - for (i=0;i1500 || len[i]<0) - { - if (feof(fin)) - { - eof = 1; - } else { - fprintf(stderr, "Invalid payload length\n"); - fclose(fin); - fclose(fout); - return EXIT_FAILURE; - } - break; - } - err = fread(ch, 1, 4, fin); - rng[i] = char_to_int(ch); - err = fread(packets[i], 1, len[i], fin); - if (feof(fin)) - { - eof = 1; - break; - } - err = opus_repacketizer_cat(rp, packets[i], len[i]); - if (err!=OPUS_OK) - { - fprintf(stderr, "opus_repacketizer_cat() failed: %s\n", opus_strerror(err)); - break; - } - } - nb_packets = i; - - if (eof) - break; - - if (!split) - { - err = opus_repacketizer_out(rp, output_packet, MAX_PACKETOUT); - if (err>0) { - unsigned char int_field[4]; - int_to_char(err, int_field); - if(fwrite(int_field, 1, 4, fout)!=4){ - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - int_to_char(rng[nb_packets-1], int_field); - if (fwrite(int_field, 1, 4, fout)!=4) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - /*fprintf(stderr, "out len = %d\n", err);*/ - } else { - fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err)); - } - } else { - int nb_frames = opus_repacketizer_get_nb_frames(rp); - for (i=0;i0) { - unsigned char int_field[4]; - int_to_char(err, int_field); - if (fwrite(int_field, 1, 4, fout)!=4) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - if (i==nb_frames-1) - int_to_char(rng[nb_packets-1], int_field); - else - int_to_char(0, int_field); - if (fwrite(int_field, 1, 4, fout)!=4) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - /*fprintf(stderr, "out len = %d\n", err);*/ - } else { - fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err)); - } - - } - } - } - - fclose(fin); - fclose(fout); - return EXIT_SUCCESS; -} diff --git a/thirdparty/opus/silk/A2NLSF.c b/thirdparty/opus/silk/A2NLSF.c deleted file mode 100644 index b6e9e5ffcc..0000000000 --- a/thirdparty/opus/silk/A2NLSF.c +++ /dev/null @@ -1,267 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -/* Conversion between prediction filter coefficients and NLSFs */ -/* Requires the order to be an even number */ -/* A piecewise linear approximation maps LSF <-> cos(LSF) */ -/* Therefore the result is not accurate NLSFs, but the two */ -/* functions are accurate inverses of each other */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "tables.h" - -/* Number of binary divisions, when not in low complexity mode */ -#define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */ -#define MAX_ITERATIONS_A2NLSF_FIX 30 - -/* Helper function for A2NLSF(..) */ -/* Transforms polynomials from cos(n*f) to cos(f)^n */ -static OPUS_INLINE void silk_A2NLSF_trans_poly( - opus_int32 *p, /* I/O Polynomial */ - const opus_int dd /* I Polynomial order (= filter order / 2 ) */ -) -{ - opus_int k, n; - - for( k = 2; k <= dd; k++ ) { - for( n = dd; n > k; n-- ) { - p[ n - 2 ] -= p[ n ]; - } - p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 ); - } -} -/* Helper function for A2NLSF(..) */ -/* Polynomial evaluation */ -static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ - opus_int32 *p, /* I Polynomial, Q16 */ - const opus_int32 x, /* I Evaluation point, Q12 */ - const opus_int dd /* I Order */ -) -{ - opus_int n; - opus_int32 x_Q16, y32; - - y32 = p[ dd ]; /* Q16 */ - x_Q16 = silk_LSHIFT( x, 4 ); - - if ( opus_likely( 8 == dd ) ) - { - y32 = silk_SMLAWW( p[ 7 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 6 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 5 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 4 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 3 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 2 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 1 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 0 ], y32, x_Q16 ); - } - else - { - for( n = dd - 1; n >= 0; n-- ) { - y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ - } - } - return y32; -} - -static OPUS_INLINE void silk_A2NLSF_init( - const opus_int32 *a_Q16, - opus_int32 *P, - opus_int32 *Q, - const opus_int dd -) -{ - opus_int k; - - /* Convert filter coefs to even and odd polynomials */ - P[dd] = silk_LSHIFT( 1, 16 ); - Q[dd] = silk_LSHIFT( 1, 16 ); - for( k = 0; k < dd; k++ ) { - P[ k ] = -a_Q16[ dd - k - 1 ] - a_Q16[ dd + k ]; /* Q16 */ - Q[ k ] = -a_Q16[ dd - k - 1 ] + a_Q16[ dd + k ]; /* Q16 */ - } - - /* Divide out zeros as we have that for even filter orders, */ - /* z = 1 is always a root in Q, and */ - /* z = -1 is always a root in P */ - for( k = dd; k > 0; k-- ) { - P[ k - 1 ] -= P[ k ]; - Q[ k - 1 ] += Q[ k ]; - } - - /* Transform polynomials from cos(n*f) to cos(f)^n */ - silk_A2NLSF_trans_poly( P, dd ); - silk_A2NLSF_trans_poly( Q, dd ); -} - -/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ -/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ -void silk_A2NLSF( - opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ - opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ - const opus_int d /* I Filter order (must be even) */ -) -{ - opus_int i, k, m, dd, root_ix, ffrac; - opus_int32 xlo, xhi, xmid; - opus_int32 ylo, yhi, ymid, thr; - opus_int32 nom, den; - opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ]; - opus_int32 Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; - opus_int32 *PQ[ 2 ]; - opus_int32 *p; - - /* Store pointers to array */ - PQ[ 0 ] = P; - PQ[ 1 ] = Q; - - dd = silk_RSHIFT( d, 1 ); - - silk_A2NLSF_init( a_Q16, P, Q, dd ); - - /* Find roots, alternating between P and Q */ - p = P; /* Pointer to polynomial */ - - xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - - if( ylo < 0 ) { - /* Set the first NLSF to zero and move on to the next */ - NLSF[ 0 ] = 0; - p = Q; /* Pointer to polynomial */ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - root_ix = 1; /* Index of current root */ - } else { - root_ix = 0; /* Index of current root */ - } - k = 1; /* Loop counter */ - i = 0; /* Counter for bandwidth expansions applied */ - thr = 0; - while( 1 ) { - /* Evaluate polynomial */ - xhi = silk_LSFCosTab_FIX_Q12[ k ]; /* Q12 */ - yhi = silk_A2NLSF_eval_poly( p, xhi, dd ); - - /* Detect zero crossing */ - if( ( ylo <= 0 && yhi >= thr ) || ( ylo >= 0 && yhi <= -thr ) ) { - if( yhi == 0 ) { - /* If the root lies exactly at the end of the current */ - /* interval, look for the next root in the next interval */ - thr = 1; - } else { - thr = 0; - } - /* Binary division */ - ffrac = -256; - for( m = 0; m < BIN_DIV_STEPS_A2NLSF_FIX; m++ ) { - /* Evaluate polynomial */ - xmid = silk_RSHIFT_ROUND( xlo + xhi, 1 ); - ymid = silk_A2NLSF_eval_poly( p, xmid, dd ); - - /* Detect zero crossing */ - if( ( ylo <= 0 && ymid >= 0 ) || ( ylo >= 0 && ymid <= 0 ) ) { - /* Reduce frequency */ - xhi = xmid; - yhi = ymid; - } else { - /* Increase frequency */ - xlo = xmid; - ylo = ymid; - ffrac = silk_ADD_RSHIFT( ffrac, 128, m ); - } - } - - /* Interpolate */ - if( silk_abs( ylo ) < 65536 ) { - /* Avoid dividing by zero */ - den = ylo - yhi; - nom = silk_LSHIFT( ylo, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) + silk_RSHIFT( den, 1 ); - if( den != 0 ) { - ffrac += silk_DIV32( nom, den ); - } - } else { - /* No risk of dividing by zero because abs(ylo - yhi) >= abs(ylo) >= 65536 */ - ffrac += silk_DIV32( ylo, silk_RSHIFT( ylo - yhi, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) ); - } - NLSF[ root_ix ] = (opus_int16)silk_min_32( silk_LSHIFT( (opus_int32)k, 8 ) + ffrac, silk_int16_MAX ); - - silk_assert( NLSF[ root_ix ] >= 0 ); - - root_ix++; /* Next root */ - if( root_ix >= d ) { - /* Found all roots */ - break; - } - /* Alternate pointer to polynomial */ - p = PQ[ root_ix & 1 ]; - - /* Evaluate polynomial */ - xlo = silk_LSFCosTab_FIX_Q12[ k - 1 ]; /* Q12*/ - ylo = silk_LSHIFT( 1 - ( root_ix & 2 ), 12 ); - } else { - /* Increment loop counter */ - k++; - xlo = xhi; - ylo = yhi; - thr = 0; - - if( k > LSF_COS_TAB_SZ_FIX ) { - i++; - if( i > MAX_ITERATIONS_A2NLSF_FIX ) { - /* Set NLSFs to white spectrum and exit */ - NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 ); - for( k = 1; k < d; k++ ) { - NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] ); - } - return; - } - - /* Error: Apply progressively more bandwidth expansion and run again */ - silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/ - - silk_A2NLSF_init( a_Q16, P, Q, dd ); - p = P; /* Pointer to polynomial */ - xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - if( ylo < 0 ) { - /* Set the first NLSF to zero and move on to the next */ - NLSF[ 0 ] = 0; - p = Q; /* Pointer to polynomial */ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - root_ix = 1; /* Index of current root */ - } else { - root_ix = 0; /* Index of current root */ - } - k = 1; /* Reset loop counter */ - } - } - } -} diff --git a/thirdparty/opus/silk/API.h b/thirdparty/opus/silk/API.h deleted file mode 100644 index 0131acbb08..0000000000 --- a/thirdparty/opus/silk/API.h +++ /dev/null @@ -1,134 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_API_H -#define SILK_API_H - -#include "control.h" -#include "typedef.h" -#include "errors.h" -#include "entenc.h" -#include "entdec.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define SILK_MAX_FRAMES_PER_PACKET 3 - -/* Struct for TOC (Table of Contents) */ -typedef struct { - opus_int VADFlag; /* Voice activity for packet */ - opus_int VADFlags[ SILK_MAX_FRAMES_PER_PACKET ]; /* Voice activity for each frame in packet */ - opus_int inbandFECFlag; /* Flag indicating if packet contains in-band FEC */ -} silk_TOC_struct; - -/****************************************/ -/* Encoder functions */ -/****************************************/ - -/***********************************************/ -/* Get size in bytes of the Silk encoder state */ -/***********************************************/ -opus_int silk_Get_Encoder_Size( /* O Returns error code */ - opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ -); - -/*************************/ -/* Init or reset encoder */ -/*************************/ -opus_int silk_InitEncoder( /* O Returns error code */ - void *encState, /* I/O State */ - int arch, /* I Run-time architecture */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -); - -/**************************/ -/* Encode frame with Silk */ -/**************************/ -/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ -/* encControl->payloadSize_ms is set to */ -opus_int silk_Encode( /* O Returns error code */ - void *encState, /* I/O State */ - silk_EncControlStruct *encControl, /* I Control status */ - const opus_int16 *samplesIn, /* I Speech sample input vector */ - opus_int nSamplesIn, /* I Number of samples in input vector */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ - const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ -); - -/****************************************/ -/* Decoder functions */ -/****************************************/ - -/***********************************************/ -/* Get size in bytes of the Silk decoder state */ -/***********************************************/ -opus_int silk_Get_Decoder_Size( /* O Returns error code */ - opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ -); - -/*************************/ -/* Init or Reset decoder */ -/*************************/ -opus_int silk_InitDecoder( /* O Returns error code */ - void *decState /* I/O State */ -); - -/******************/ -/* Decode a frame */ -/******************/ -opus_int silk_Decode( /* O Returns error code */ - void* decState, /* I/O State */ - silk_DecControlStruct* decControl, /* I/O Control Structure */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 *samplesOut, /* O Decoded output speech vector */ - opus_int32 *nSamplesOut, /* O Number of samples decoded */ - int arch /* I Run-time architecture */ -); - -#if 0 -/**************************************/ -/* Get table of contents for a packet */ -/**************************************/ -opus_int silk_get_TOC( - const opus_uint8 *payload, /* I Payload data */ - const opus_int nBytesIn, /* I Number of input bytes */ - const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ - silk_TOC_struct *Silk_TOC /* O Type of content */ -); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/CNG.c b/thirdparty/opus/silk/CNG.c deleted file mode 100644 index 8443ad63bb..0000000000 --- a/thirdparty/opus/silk/CNG.c +++ /dev/null @@ -1,184 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/* Generates excitation for CNG LPC synthesis */ -static OPUS_INLINE void silk_CNG_exc( - opus_int32 exc_Q14[], /* O CNG excitation signal Q10 */ - opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ - opus_int length, /* I Length */ - opus_int32 *rand_seed /* I/O Seed to random index generator */ -) -{ - opus_int32 seed; - opus_int i, idx, exc_mask; - - exc_mask = CNG_BUF_MASK_MAX; - while( exc_mask > length ) { - exc_mask = silk_RSHIFT( exc_mask, 1 ); - } - - seed = *rand_seed; - for( i = 0; i < length; i++ ) { - seed = silk_RAND( seed ); - idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); - silk_assert( idx >= 0 ); - silk_assert( idx <= CNG_BUF_MASK_MAX ); - exc_Q14[ i ] = exc_buf_Q14[ idx ]; - } - *rand_seed = seed; -} - -void silk_CNG_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -) -{ - opus_int i, NLSF_step_Q15, NLSF_acc_Q15; - - NLSF_step_Q15 = silk_DIV32_16( silk_int16_MAX, psDec->LPC_order + 1 ); - NLSF_acc_Q15 = 0; - for( i = 0; i < psDec->LPC_order; i++ ) { - NLSF_acc_Q15 += NLSF_step_Q15; - psDec->sCNG.CNG_smth_NLSF_Q15[ i ] = NLSF_acc_Q15; - } - psDec->sCNG.CNG_smth_Gain_Q16 = 0; - psDec->sCNG.rand_seed = 3176576; -} - -/* Updates CNG estimate, and applies the CNG when packet was lost */ -void silk_CNG( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O Signal */ - opus_int length /* I Length of residual */ -) -{ - opus_int i, subfr; - opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10; - opus_int16 A_Q12[ MAX_LPC_ORDER ]; - silk_CNG_struct *psCNG = &psDec->sCNG; - SAVE_STACK; - - if( psDec->fs_kHz != psCNG->fs_kHz ) { - /* Reset state */ - silk_CNG_Reset( psDec ); - - psCNG->fs_kHz = psDec->fs_kHz; - } - if( psDec->lossCnt == 0 && psDec->prevSignalType == TYPE_NO_VOICE_ACTIVITY ) { - /* Update CNG parameters */ - - /* Smoothing of LSF's */ - for( i = 0; i < psDec->LPC_order; i++ ) { - psCNG->CNG_smth_NLSF_Q15[ i ] += silk_SMULWB( (opus_int32)psDec->prevNLSF_Q15[ i ] - (opus_int32)psCNG->CNG_smth_NLSF_Q15[ i ], CNG_NLSF_SMTH_Q16 ); - } - /* Find the subframe with the highest gain */ - max_Gain_Q16 = 0; - subfr = 0; - for( i = 0; i < psDec->nb_subfr; i++ ) { - if( psDecCtrl->Gains_Q16[ i ] > max_Gain_Q16 ) { - max_Gain_Q16 = psDecCtrl->Gains_Q16[ i ]; - subfr = i; - } - } - /* Update CNG excitation buffer with excitation from this subframe */ - silk_memmove( &psCNG->CNG_exc_buf_Q14[ psDec->subfr_length ], psCNG->CNG_exc_buf_Q14, ( psDec->nb_subfr - 1 ) * psDec->subfr_length * sizeof( opus_int32 ) ); - silk_memcpy( psCNG->CNG_exc_buf_Q14, &psDec->exc_Q14[ subfr * psDec->subfr_length ], psDec->subfr_length * sizeof( opus_int32 ) ); - - /* Smooth gains */ - for( i = 0; i < psDec->nb_subfr; i++ ) { - psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 ); - } - } - - /* Add CNG when packet is lost or during DTX */ - if( psDec->lossCnt ) { - VARDECL( opus_int32, CNG_sig_Q14 ); - ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 ); - - /* Generate CNG excitation */ - gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); - if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) { - gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 ); - gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); - gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 ); - } else { - gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 ); - gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); - gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); - } - gain_Q10 = silk_RSHIFT( gain_Q16, 6 ); - - silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed ); - - /* Convert CNG NLSF to filter representation */ - silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); - - /* Generate CNG signal, by synthesis filtering */ - silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - for( i = 0; i < length; i++ ) { - silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); - if( psDec->LPC_order == 16 ) { - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] ); - } - - /* Update states */ - CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); - - /* Scale with Gain and add to input signal */ - frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) ); - - } - silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); - } else { - silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) ); - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/HP_variable_cutoff.c b/thirdparty/opus/silk/HP_variable_cutoff.c deleted file mode 100644 index bbe10f04ce..0000000000 --- a/thirdparty/opus/silk/HP_variable_cutoff.c +++ /dev/null @@ -1,77 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#ifdef FIXED_POINT -#include "main_FIX.h" -#else -#include "main_FLP.h" -#endif -#include "tuning_parameters.h" - -/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ -void silk_HP_variable_cutoff( - silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ -) -{ - opus_int quality_Q15; - opus_int32 pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7; - silk_encoder_state *psEncC1 = &state_Fxx[ 0 ].sCmn; - - /* Adaptive cutoff frequency: estimate low end of pitch frequency range */ - if( psEncC1->prevSignalType == TYPE_VOICED ) { - /* difference, in log domain */ - pitch_freq_Hz_Q16 = silk_DIV32_16( silk_LSHIFT( silk_MUL( psEncC1->fs_kHz, 1000 ), 16 ), psEncC1->prevLag ); - pitch_freq_log_Q7 = silk_lin2log( pitch_freq_Hz_Q16 ) - ( 16 << 7 ); - - /* adjustment based on quality */ - quality_Q15 = psEncC1->input_quality_bands_Q15[ 0 ]; - pitch_freq_log_Q7 = silk_SMLAWB( pitch_freq_log_Q7, silk_SMULWB( silk_LSHIFT( -quality_Q15, 2 ), quality_Q15 ), - pitch_freq_log_Q7 - ( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ) ) ); - - /* delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; */ - delta_freq_Q7 = pitch_freq_log_Q7 - silk_RSHIFT( psEncC1->variable_HP_smth1_Q15, 8 ); - if( delta_freq_Q7 < 0 ) { - /* less smoothing for decreasing pitch frequency, to track something close to the minimum */ - delta_freq_Q7 = silk_MUL( delta_freq_Q7, 3 ); - } - - /* limit delta, to reduce impact of outliers in pitch estimation */ - delta_freq_Q7 = silk_LIMIT_32( delta_freq_Q7, -SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ), SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ) ); - - /* update smoother */ - psEncC1->variable_HP_smth1_Q15 = silk_SMLAWB( psEncC1->variable_HP_smth1_Q15, - silk_SMULBB( psEncC1->speech_activity_Q8, delta_freq_Q7 ), SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF1, 16 ) ); - - /* limit frequency range */ - psEncC1->variable_HP_smth1_Q15 = silk_LIMIT_32( psEncC1->variable_HP_smth1_Q15, - silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ), - silk_LSHIFT( silk_lin2log( VARIABLE_HP_MAX_CUTOFF_HZ ), 8 ) ); - } -} diff --git a/thirdparty/opus/silk/Inlines.h b/thirdparty/opus/silk/Inlines.h deleted file mode 100644 index ec986cdfdd..0000000000 --- a/thirdparty/opus/silk/Inlines.h +++ /dev/null @@ -1,188 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -/*! \file silk_Inlines.h - * \brief silk_Inlines.h defines OPUS_INLINE signal processing functions. - */ - -#ifndef SILK_FIX_INLINES_H -#define SILK_FIX_INLINES_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* count leading zeros of opus_int64 */ -static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in ) -{ - opus_int32 in_upper; - - in_upper = (opus_int32)silk_RSHIFT64(in, 32); - if (in_upper == 0) { - /* Search in the lower 32 bits */ - return 32 + silk_CLZ32( (opus_int32) in ); - } else { - /* Search in the upper 32 bits */ - return silk_CLZ32( in_upper ); - } -} - -/* get number of leading zeros and fractional part (the bits right after the leading one */ -static OPUS_INLINE void silk_CLZ_FRAC( - opus_int32 in, /* I input */ - opus_int32 *lz, /* O number of leading zeros */ - opus_int32 *frac_Q7 /* O the 7 bits right after the leading one */ -) -{ - opus_int32 lzeros = silk_CLZ32(in); - - * lz = lzeros; - * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f; -} - -/* Approximation of square root */ -/* Accuracy: < +/- 10% for output values > 15 */ -/* < +/- 2.5% for output values > 120 */ -static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x ) -{ - opus_int32 y, lz, frac_Q7; - - if( x <= 0 ) { - return 0; - } - - silk_CLZ_FRAC(x, &lz, &frac_Q7); - - if( lz & 1 ) { - y = 32768; - } else { - y = 46214; /* 46214 = sqrt(2) * 32768 */ - } - - /* get scaling right */ - y >>= silk_RSHIFT(lz, 1); - - /* increment using fractional part of input */ - y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7)); - - return y; -} - -/* Divide two int32 values and return result as int32 in a given Q-domain */ -static OPUS_INLINE opus_int32 silk_DIV32_varQ( /* O returns a good approximation of "(a32 << Qres) / b32" */ - const opus_int32 a32, /* I numerator (Q0) */ - const opus_int32 b32, /* I denominator (Q0) */ - const opus_int Qres /* I Q-domain of result (>= 0) */ -) -{ - opus_int a_headrm, b_headrm, lshift; - opus_int32 b32_inv, a32_nrm, b32_nrm, result; - - silk_assert( b32 != 0 ); - silk_assert( Qres >= 0 ); - - /* Compute number of bits head room and normalize inputs */ - a_headrm = silk_CLZ32( silk_abs(a32) ) - 1; - a32_nrm = silk_LSHIFT(a32, a_headrm); /* Q: a_headrm */ - b_headrm = silk_CLZ32( silk_abs(b32) ) - 1; - b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */ - - /* Inverse of b32, with 14 bits of precision */ - b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */ - - /* First approximation */ - result = silk_SMULWB(a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */ - - /* Compute residual by subtracting product of denominator and first approximation */ - /* It's OK to overflow because the final value of a32_nrm should always be small */ - a32_nrm = silk_SUB32_ovflw(a32_nrm, silk_LSHIFT_ovflw( silk_SMMUL(b32_nrm, result), 3 )); /* Q: a_headrm */ - - /* Refinement */ - result = silk_SMLAWB(result, a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */ - - /* Convert to Qres domain */ - lshift = 29 + a_headrm - b_headrm - Qres; - if( lshift < 0 ) { - return silk_LSHIFT_SAT32(result, -lshift); - } else { - if( lshift < 32){ - return silk_RSHIFT(result, lshift); - } else { - /* Avoid undefined result */ - return 0; - } - } -} - -/* Invert int32 value and return result as int32 in a given Q-domain */ -static OPUS_INLINE opus_int32 silk_INVERSE32_varQ( /* O returns a good approximation of "(1 << Qres) / b32" */ - const opus_int32 b32, /* I denominator (Q0) */ - const opus_int Qres /* I Q-domain of result (> 0) */ -) -{ - opus_int b_headrm, lshift; - opus_int32 b32_inv, b32_nrm, err_Q32, result; - - silk_assert( b32 != 0 ); - silk_assert( Qres > 0 ); - - /* Compute number of bits head room and normalize input */ - b_headrm = silk_CLZ32( silk_abs(b32) ) - 1; - b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */ - - /* Inverse of b32, with 14 bits of precision */ - b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */ - - /* First approximation */ - result = silk_LSHIFT(b32_inv, 16); /* Q: 61 - b_headrm */ - - /* Compute residual by subtracting product of denominator and first approximation from one */ - err_Q32 = silk_LSHIFT( ((opus_int32)1<<29) - silk_SMULWB(b32_nrm, b32_inv), 3 ); /* Q32 */ - - /* Refinement */ - result = silk_SMLAWW(result, err_Q32, b32_inv); /* Q: 61 - b_headrm */ - - /* Convert to Qres domain */ - lshift = 61 - b_headrm - Qres; - if( lshift <= 0 ) { - return silk_LSHIFT_SAT32(result, -lshift); - } else { - if( lshift < 32){ - return silk_RSHIFT(result, lshift); - }else{ - /* Avoid undefined result */ - return 0; - } - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_FIX_INLINES_H */ diff --git a/thirdparty/opus/silk/LPC_analysis_filter.c b/thirdparty/opus/silk/LPC_analysis_filter.c deleted file mode 100644 index 20906673ff..0000000000 --- a/thirdparty/opus/silk/LPC_analysis_filter.c +++ /dev/null @@ -1,108 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "celt_lpc.h" - -/*******************************************/ -/* LPC analysis filter */ -/* NB! State is kept internally and the */ -/* filter always starts with zero state */ -/* first d output samples are set to zero */ -/*******************************************/ - -void silk_LPC_analysis_filter( - opus_int16 *out, /* O Output signal */ - const opus_int16 *in, /* I Input signal */ - const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ - const opus_int32 len, /* I Signal length */ - const opus_int32 d, /* I Filter order */ - int arch /* I Run-time architecture */ -) -{ - opus_int j; -#ifdef FIXED_POINT - opus_int16 mem[SILK_MAX_ORDER_LPC]; - opus_int16 num[SILK_MAX_ORDER_LPC]; -#else - int ix; - opus_int32 out32_Q12, out32; - const opus_int16 *in_ptr; -#endif - - silk_assert( d >= 6 ); - silk_assert( (d & 1) == 0 ); - silk_assert( d <= len ); - -#ifdef FIXED_POINT - silk_assert( d <= SILK_MAX_ORDER_LPC ); - for ( j = 0; j < d; j++ ) { - num[ j ] = -B[ j ]; - } - for (j=0;j 0; k-- ) { - /* Check for stability */ - if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) { - return 0; - } - - /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA ); - - /* rc_mult1_Q30 range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */ - silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) ); - - /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ - mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); - rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); - - /* Update inverse gain */ - /* invGain_Q30 range: [ 0 : 2^30 ] */ - invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); - silk_assert( invGain_Q30 >= 0 ); - silk_assert( invGain_Q30 <= ( 1 << 30 ) ); - - /* Swap pointers */ - Aold_QA = Anew_QA; - Anew_QA = A_QA[ k & 1 ]; - - /* Update AR coefficient */ - for( n = 0; n < k; n++ ) { - tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 ); - Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q ); - } - } - - /* Check for stability */ - if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) { - return 0; - } - - /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA ); - - /* Range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - - /* Update inverse gain */ - /* Range: [ 0 : 2^30 ] */ - invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); - silk_assert( invGain_Q30 >= 0 ); - silk_assert( invGain_Q30 <= 1<<30 ); - - return invGain_Q30; -} - -/* For input in Q12 domain */ -opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ - const opus_int order /* I Prediction order */ -) -{ - opus_int k; - opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; - opus_int32 *Anew_QA; - opus_int32 DC_resp = 0; - - Anew_QA = Atmp_QA[ order & 1 ]; - - /* Increase Q domain of the AR coefficients */ - for( k = 0; k < order; k++ ) { - DC_resp += (opus_int32)A_Q12[ k ]; - Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 ); - } - /* If the DC is unstable, we don't even need to do the full calculations */ - if( DC_resp >= 4096 ) { - return 0; - } - return LPC_inverse_pred_gain_QA( Atmp_QA, order ); -} - -#ifdef FIXED_POINT - -/* For input in Q24 domain */ -opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ - const opus_int order /* I Prediction order */ -) -{ - opus_int k; - opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; - opus_int32 *Anew_QA; - - Anew_QA = Atmp_QA[ order & 1 ]; - - /* Increase Q domain of the AR coefficients */ - for( k = 0; k < order; k++ ) { - Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA ); - } - - return LPC_inverse_pred_gain_QA( Atmp_QA, order ); -} -#endif diff --git a/thirdparty/opus/silk/LP_variable_cutoff.c b/thirdparty/opus/silk/LP_variable_cutoff.c deleted file mode 100644 index f639e1f899..0000000000 --- a/thirdparty/opus/silk/LP_variable_cutoff.c +++ /dev/null @@ -1,135 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* - Elliptic/Cauer filters designed with 0.1 dB passband ripple, - 80 dB minimum stopband attenuation, and - [0.95 : 0.15 : 0.35] normalized cut off frequencies. -*/ - -#include "main.h" - -/* Helper function, interpolates the filter taps */ -static OPUS_INLINE void silk_LP_interpolate_filter_taps( - opus_int32 B_Q28[ TRANSITION_NB ], - opus_int32 A_Q28[ TRANSITION_NA ], - const opus_int ind, - const opus_int32 fac_Q16 -) -{ - opus_int nb, na; - - if( ind < TRANSITION_INT_NUM - 1 ) { - if( fac_Q16 > 0 ) { - if( fac_Q16 < 32768 ) { /* fac_Q16 is in range of a 16-bit int */ - /* Piece-wise linear interpolation of B and A */ - for( nb = 0; nb < TRANSITION_NB; nb++ ) { - B_Q28[ nb ] = silk_SMLAWB( - silk_Transition_LP_B_Q28[ ind ][ nb ], - silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] - - silk_Transition_LP_B_Q28[ ind ][ nb ], - fac_Q16 ); - } - for( na = 0; na < TRANSITION_NA; na++ ) { - A_Q28[ na ] = silk_SMLAWB( - silk_Transition_LP_A_Q28[ ind ][ na ], - silk_Transition_LP_A_Q28[ ind + 1 ][ na ] - - silk_Transition_LP_A_Q28[ ind ][ na ], - fac_Q16 ); - } - } else { /* ( fac_Q16 - ( 1 << 16 ) ) is in range of a 16-bit int */ - silk_assert( fac_Q16 - ( 1 << 16 ) == silk_SAT16( fac_Q16 - ( 1 << 16 ) ) ); - /* Piece-wise linear interpolation of B and A */ - for( nb = 0; nb < TRANSITION_NB; nb++ ) { - B_Q28[ nb ] = silk_SMLAWB( - silk_Transition_LP_B_Q28[ ind + 1 ][ nb ], - silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] - - silk_Transition_LP_B_Q28[ ind ][ nb ], - fac_Q16 - ( (opus_int32)1 << 16 ) ); - } - for( na = 0; na < TRANSITION_NA; na++ ) { - A_Q28[ na ] = silk_SMLAWB( - silk_Transition_LP_A_Q28[ ind + 1 ][ na ], - silk_Transition_LP_A_Q28[ ind + 1 ][ na ] - - silk_Transition_LP_A_Q28[ ind ][ na ], - fac_Q16 - ( (opus_int32)1 << 16 ) ); - } - } - } else { - silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ ind ], TRANSITION_NB * sizeof( opus_int32 ) ); - silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ ind ], TRANSITION_NA * sizeof( opus_int32 ) ); - } - } else { - silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NB * sizeof( opus_int32 ) ); - silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NA * sizeof( opus_int32 ) ); - } -} - -/* Low-pass filter with variable cutoff frequency based on */ -/* piece-wise linear interpolation between elliptic filters */ -/* Start by setting psEncC->mode <> 0; */ -/* Deactivate by setting psEncC->mode = 0; */ -void silk_LP_variable_cutoff( - silk_LP_state *psLP, /* I/O LP filter state */ - opus_int16 *frame, /* I/O Low-pass filtered output signal */ - const opus_int frame_length /* I Frame length */ -) -{ - opus_int32 B_Q28[ TRANSITION_NB ], A_Q28[ TRANSITION_NA ], fac_Q16 = 0; - opus_int ind = 0; - - silk_assert( psLP->transition_frame_no >= 0 && psLP->transition_frame_no <= TRANSITION_FRAMES ); - - /* Run filter if needed */ - if( psLP->mode != 0 ) { - /* Calculate index and interpolation factor for interpolation */ -#if( TRANSITION_INT_STEPS == 64 ) - fac_Q16 = silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 - 6 ); -#else - fac_Q16 = silk_DIV32_16( silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 ), TRANSITION_FRAMES ); -#endif - ind = silk_RSHIFT( fac_Q16, 16 ); - fac_Q16 -= silk_LSHIFT( ind, 16 ); - - silk_assert( ind >= 0 ); - silk_assert( ind < TRANSITION_INT_NUM ); - - /* Interpolate filter coefficients */ - silk_LP_interpolate_filter_taps( B_Q28, A_Q28, ind, fac_Q16 ); - - /* Update transition frame number for next frame */ - psLP->transition_frame_no = silk_LIMIT( psLP->transition_frame_no + psLP->mode, 0, TRANSITION_FRAMES ); - - /* ARMA low-pass filtering */ - silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 ); - silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1); - } -} diff --git a/thirdparty/opus/silk/MacroCount.h b/thirdparty/opus/silk/MacroCount.h deleted file mode 100644 index 834817d058..0000000000 --- a/thirdparty/opus/silk/MacroCount.h +++ /dev/null @@ -1,718 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SIGPROCFIX_API_MACROCOUNT_H -#define SIGPROCFIX_API_MACROCOUNT_H -#include - -#ifdef silk_MACRO_COUNT -#define varDefine opus_int64 ops_count = 0; - -extern opus_int64 ops_count; - -static OPUS_INLINE opus_int64 silk_SaveCount(){ - return(ops_count); -} - -static OPUS_INLINE opus_int64 silk_SaveResetCount(){ - opus_int64 ret; - - ret = ops_count; - ops_count = 0; - return(ret); -} - -static OPUS_INLINE silk_PrintCount(){ - printf("ops_count = %d \n ", (opus_int32)ops_count); -} - -#undef silk_MUL -static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 4; - ret = a32 * b32; - return ret; -} - -#undef silk_MUL_uint -static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){ - opus_uint32 ret; - ops_count += 4; - ret = a32 * b32; - return ret; -} -#undef silk_MLA -static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 4; - ret = a32 + b32 * c32; - return ret; -} - -#undef silk_MLA_uint -static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){ - opus_uint32 ret; - ops_count += 4; - ret = a32 + b32 * c32; - return ret; -} - -#undef silk_SMULWB -static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 5; - ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); - return ret; -} -#undef silk_SMLAWB -static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 5; - ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))); - return ret; -} - -#undef silk_SMULWT -static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 4; - ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); - return ret; -} -#undef silk_SMLAWT -static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 4; - ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); - return ret; -} - -#undef silk_SMULBB -static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 1; - ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32); - return ret; -} -#undef silk_SMLABB -static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 1; - ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); - return ret; -} - -#undef silk_SMULBT -static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){ - opus_int32 ret; - ops_count += 4; - ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16); - return ret; -} - -#undef silk_SMLABT -static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 1; - ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); - return ret; -} - -#undef silk_SMULTT -static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 1; - ret = (a32 >> 16) * (b32 >> 16); - return ret; -} - -#undef silk_SMLATT -static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 1; - ret = a32 + (b32 >> 16) * (c32 >> 16); - return ret; -} - - -/* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/ -#undef silk_MLA_ovflw -#define silk_MLA_ovflw silk_MLA - -#undef silk_SMLABB_ovflw -#define silk_SMLABB_ovflw silk_SMLABB - -#undef silk_SMLABT_ovflw -#define silk_SMLABT_ovflw silk_SMLABT - -#undef silk_SMLATT_ovflw -#define silk_SMLATT_ovflw silk_SMLATT - -#undef silk_SMLAWB_ovflw -#define silk_SMLAWB_ovflw silk_SMLAWB - -#undef silk_SMLAWT_ovflw -#define silk_SMLAWT_ovflw silk_SMLAWT - -#undef silk_SMULL -static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){ - opus_int64 ret; - ops_count += 8; - ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32)); - return ret; -} - -#undef silk_SMLAL -static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){ - opus_int64 ret; - ops_count += 8; - ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32)); - return ret; -} -#undef silk_SMLALBB -static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){ - opus_int64 ret; - ops_count += 4; - ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16)); - return ret; -} - -#undef SigProcFIX_CLZ16 -static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16) -{ - opus_int32 out32 = 0; - ops_count += 10; - if( in16 == 0 ) { - return 16; - } - /* test nibbles */ - if( in16 & 0xFF00 ) { - if( in16 & 0xF000 ) { - in16 >>= 12; - } else { - out32 += 4; - in16 >>= 8; - } - } else { - if( in16 & 0xFFF0 ) { - out32 += 8; - in16 >>= 4; - } else { - out32 += 12; - } - } - /* test bits and return */ - if( in16 & 0xC ) { - if( in16 & 0x8 ) - return out32 + 0; - else - return out32 + 1; - } else { - if( in16 & 0xE ) - return out32 + 2; - else - return out32 + 3; - } -} - -#undef SigProcFIX_CLZ32 -static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32) -{ - /* test highest 16 bits and convert to opus_int16 */ - ops_count += 2; - if( in32 & 0xFFFF0000 ) { - return SigProcFIX_CLZ16((opus_int16)(in32 >> 16)); - } else { - return SigProcFIX_CLZ16((opus_int16)in32) + 16; - } -} - -#undef silk_DIV32 -static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){ - ops_count += 64; - return a32 / b32; -} - -#undef silk_DIV32_16 -static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){ - ops_count += 32; - return a32 / b32; -} - -#undef silk_SAT8 -static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){ - opus_int8 tmp; - ops_count += 1; - tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX : \ - ((a) < silk_int8_MIN ? silk_int8_MIN : (a))); - return(tmp); -} - -#undef silk_SAT16 -static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){ - opus_int16 tmp; - ops_count += 1; - tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX : \ - ((a) < silk_int16_MIN ? silk_int16_MIN : (a))); - return(tmp); -} -#undef silk_SAT32 -static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){ - opus_int32 tmp; - ops_count += 1; - tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : \ - ((a) < silk_int32_MIN ? silk_int32_MIN : (a))); - return(tmp); -} -#undef silk_POS_SAT32 -static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){ - opus_int32 tmp; - ops_count += 1; - tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a)); - return(tmp); -} - -#undef silk_ADD_POS_SAT8 -static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){ - opus_int8 tmp; - ops_count += 1; - tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))); - return(tmp); -} -#undef silk_ADD_POS_SAT16 -static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){ - opus_int16 tmp; - ops_count += 1; - tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))); - return(tmp); -} - -#undef silk_ADD_POS_SAT32 -static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){ - opus_int32 tmp; - ops_count += 1; - tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))); - return(tmp); -} - -#undef silk_ADD_POS_SAT64 -static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){ - opus_int64 tmp; - ops_count += 1; - tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))); - return(tmp); -} - -#undef silk_LSHIFT8 -static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){ - opus_int8 ret; - ops_count += 1; - ret = a << shift; - return ret; -} -#undef silk_LSHIFT16 -static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){ - opus_int16 ret; - ops_count += 1; - ret = a << shift; - return ret; -} -#undef silk_LSHIFT32 -static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a << shift; - return ret; -} -#undef silk_LSHIFT64 -static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){ - ops_count += 1; - return a << shift; -} - -#undef silk_LSHIFT_ovflw -static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){ - ops_count += 1; - return a << shift; -} - -#undef silk_LSHIFT_uint -static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){ - opus_uint32 ret; - ops_count += 1; - ret = a << shift; - return ret; -} - -#undef silk_RSHIFT8 -static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} -#undef silk_RSHIFT16 -static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} -#undef silk_RSHIFT32 -static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} -#undef silk_RSHIFT64 -static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){ - ops_count += 1; - return a >> shift; -} - -#undef silk_RSHIFT_uint -static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} - -#undef silk_ADD_LSHIFT -static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_ADD_LSHIFT32 -static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_ADD_LSHIFT_uint -static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ - opus_uint32 ret; - ops_count += 1; - ret = a + (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_ADD_RSHIFT -static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b >> shift); - return ret; /* shift > 0*/ -} -#undef silk_ADD_RSHIFT32 -static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b >> shift); - return ret; /* shift > 0*/ -} -#undef silk_ADD_RSHIFT_uint -static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ - opus_uint32 ret; - ops_count += 1; - ret = a + (b >> shift); - return ret; /* shift > 0*/ -} -#undef silk_SUB_LSHIFT32 -static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a - (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_SUB_RSHIFT32 -static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a - (b >> shift); - return ret; /* shift > 0*/ -} - -#undef silk_RSHIFT_ROUND -static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){ - opus_int32 ret; - ops_count += 3; - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - return ret; -} - -#undef silk_RSHIFT_ROUND64 -static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){ - opus_int64 ret; - ops_count += 6; - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - return ret; -} - -#undef silk_abs_int64 -static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){ - ops_count += 1; - return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/ -} - -#undef silk_abs_int32 -static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){ - ops_count += 1; - return silk_abs(a); -} - - -#undef silk_min -static silk_min(a, b){ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} -#undef silk_max -static silk_max(a, b){ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} -#undef silk_sign -static silk_sign(a){ - ops_count += 1; - return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )); -} - -#undef silk_ADD16 -static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){ - opus_int16 ret; - ops_count += 1; - ret = a + b; - return ret; -} - -#undef silk_ADD32 -static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){ - opus_int32 ret; - ops_count += 1; - ret = a + b; - return ret; -} - -#undef silk_ADD64 -static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){ - opus_int64 ret; - ops_count += 2; - ret = a + b; - return ret; -} - -#undef silk_SUB16 -static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){ - opus_int16 ret; - ops_count += 1; - ret = a - b; - return ret; -} - -#undef silk_SUB32 -static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){ - opus_int32 ret; - ops_count += 1; - ret = a - b; - return ret; -} - -#undef silk_SUB64 -static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){ - opus_int64 ret; - ops_count += 2; - ret = a - b; - return ret; -} - -#undef silk_ADD_SAT16 -static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) { - opus_int16 res; - /* Nb will be counted in AKP_add32 and silk_SAT16*/ - res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); - return res; -} - -#undef silk_ADD_SAT32 -static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){ - opus_int32 res; - ops_count += 1; - res = ((((a32) + (b32)) & 0x80000000) == 0 ? \ - ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ - ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); - return res; -} - -#undef silk_ADD_SAT64 -static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) { - opus_int64 res; - ops_count += 1; - res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ - ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ - ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); - return res; -} - -#undef silk_SUB_SAT16 -static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) { - opus_int16 res; - silk_assert(0); - /* Nb will be counted in sub-macros*/ - res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); - return res; -} - -#undef silk_SUB_SAT32 -static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) { - opus_int32 res; - ops_count += 1; - res = ((((a32)-(b32)) & 0x80000000) == 0 ? \ - (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ - ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); - return res; -} - -#undef silk_SUB_SAT64 -static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) { - opus_int64 res; - ops_count += 1; - res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ - (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ - ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); - - return res; -} - -#undef silk_SMULWW -static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - /* Nb will be counted in sub-macros*/ - ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)); - return ret; -} - -#undef silk_SMLAWW -static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - /* Nb will be counted in sub-macros*/ - ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)); - return ret; -} - -#undef silk_min_int -static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} - -#undef silk_min_16 -static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} -#undef silk_min_32 -static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} -#undef silk_min_64 -static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} - -/* silk_min() versions with typecast in the function call */ -#undef silk_max_int -static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} -#undef silk_max_16 -static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} -#undef silk_max_32 -static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} - -#undef silk_max_64 -static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} - - -#undef silk_LIMIT_int -static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2) -{ - opus_int ret; - ops_count += 6; - - ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); - - return(ret); -} - -#undef silk_LIMIT_16 -static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2) -{ - opus_int16 ret; - ops_count += 6; - - ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); - -return(ret); -} - - -#undef silk_LIMIT_32 -static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2) -{ - opus_int32 ret; - ops_count += 6; - - ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); - return(ret); -} - -#else -#define varDefine -#define silk_SaveCount() - -#endif -#endif - diff --git a/thirdparty/opus/silk/MacroDebug.h b/thirdparty/opus/silk/MacroDebug.h deleted file mode 100644 index 35aedc5c5f..0000000000 --- a/thirdparty/opus/silk/MacroDebug.h +++ /dev/null @@ -1,952 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Copyright (C) 2012 Xiph.Org Foundation -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef MACRO_DEBUG_H -#define MACRO_DEBUG_H - -/* Redefine macro functions with extensive assertion in DEBUG mode. - As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */ - -#if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT) - -#undef silk_ADD16 -#define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){ - opus_int16 ret; - - ret = a + b; - if ( ret != silk_ADD_SAT16( a, b ) ) - { - fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_ADD32 -#define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){ - opus_int32 ret; - - ret = a + b; - if ( ret != silk_ADD_SAT32( a, b ) ) - { - fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_ADD64 -#define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){ - opus_int64 ret; - - ret = a + b; - if ( ret != silk_ADD_SAT64( a, b ) ) - { - fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SUB16 -#define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){ - opus_int16 ret; - - ret = a - b; - if ( ret != silk_SUB_SAT16( a, b ) ) - { - fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SUB32 -#define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){ - opus_int32 ret; - - ret = a - b; - if ( ret != silk_SUB_SAT32( a, b ) ) - { - fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SUB64 -#define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){ - opus_int64 ret; - - ret = a - b; - if ( ret != silk_SUB_SAT64( a, b ) ) - { - fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_ADD_SAT16 -#define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) { - opus_int16 res; - res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); - if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) ) - { - fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_ADD_SAT32 -#define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 res; - res = ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ? \ - ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ - ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); - if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) ) - { - fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_ADD_SAT64 -#define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) { - opus_int64 res; - int fail = 0; - res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ - ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ - ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); - if( res != a64 + b64 ) { - /* Check that we saturated to the correct extreme value */ - if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || - ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ) ) - { - fail = 1; - } - } else { - /* Saturation not necessary */ - fail = res != a64 + b64; - } - if ( fail ) - { - fprintf (stderr, "silk_ADD_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_SUB_SAT16 -#define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) { - opus_int16 res; - res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); - if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) ) - { - fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_SUB_SAT32 -#define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) { - opus_int32 res; - res = ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ? \ - (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ - ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); - if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) ) - { - fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_SUB_SAT64 -#define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) { - opus_int64 res; - int fail = 0; - res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ - (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ - ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); - if( res != a64 - b64 ) { - /* Check that we saturated to the correct extreme value */ - if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || - ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) )) - { - fail = 1; - } - } else { - /* Saturation not necessary */ - fail = res != a64 - b64; - } - if ( fail ) - { - fprintf (stderr, "silk_SUB_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_MUL -#define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret; - opus_int64 ret64; - ret = a32 * b32; - ret64 = (opus_int64)a32 * (opus_int64)b32; - if ( (opus_int64)ret != ret64 ) - { - fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_MUL_uint -#define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){ - opus_uint32 ret; - ret = a32 * b32; - if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 ) - { - fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_MLA -#define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + b32 * c32; - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) - { - fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_MLA_uint -#define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){ - opus_uint32 ret; - ret = a32 + b32 * c32; - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) - { - fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULWB -#define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret; - ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); - if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 ) - { - fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMLAWB -#define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) ); - if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) ) - { - fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULWT -#define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret; - ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); - if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 ) - { - fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMLAWT -#define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); - if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) ) - { - fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULL -#define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){ - opus_int64 ret64; - int fail = 0; - ret64 = a64 * b64; - if( b64 != 0 ) { - fail = a64 != (ret64 / b64); - } else if( a64 != 0 ) { - fail = b64 != (ret64 / a64); - } - if ( fail ) - { - fprintf (stderr, "silk_SMULL(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret64; -} - -/* no checking needed for silk_SMULBB */ -#undef silk_SMLABB -#define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 ) - { - fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -/* no checking needed for silk_SMULBT */ -#undef silk_SMLABT -#define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) ) - { - fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -/* no checking needed for silk_SMULTT */ -#undef silk_SMLATT -#define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + (b32 >> 16) * (c32 >> 16); - if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) ) - { - fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULWW -#define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret, tmp1, tmp2; - opus_int64 ret64; - int fail = 0; - - ret = silk_SMULWB( a32, b32 ); - tmp1 = silk_RSHIFT_ROUND( b32, 16 ); - tmp2 = silk_MUL( a32, tmp1 ); - - fail |= (opus_int64)tmp2 != (opus_int64) a32 * (opus_int64) tmp1; - - tmp1 = ret; - ret = silk_ADD32( tmp1, tmp2 ); - fail |= silk_ADD32( tmp1, tmp2 ) != silk_ADD_SAT32( tmp1, tmp2 ); - - ret64 = silk_RSHIFT64( silk_SMULL( a32, b32 ), 16 ); - fail |= (opus_int64)ret != ret64; - - if ( fail ) - { - fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - - return ret; -} - -#undef silk_SMLAWW -#define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret, tmp; - - tmp = silk_SMULWW( b32, c32 ); - ret = silk_ADD32( a32, tmp ); - if ( ret != silk_ADD_SAT32( a32, tmp ) ) - { - fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ -#undef silk_MLA_ovflw -#define silk_MLA_ovflw(a32, b32, c32) ((a32) + ((b32) * (c32))) -#undef silk_SMLABB_ovflw -#define silk_SMLABB_ovflw(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))) - -/* no checking needed for silk_SMULL - no checking needed for silk_SMLAL - no checking needed for silk_SMLALBB - no checking needed for SigProcFIX_CLZ16 - no checking needed for SigProcFIX_CLZ32*/ - -#undef silk_DIV32 -#define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){ - if ( b32 == 0 ) - { - fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a32 / b32; -} - -#undef silk_DIV32_16 -#define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){ - int fail = 0; - fail |= b32 == 0; - fail |= b32 > silk_int16_MAX; - fail |= b32 < silk_int16_MIN; - if ( fail ) - { - fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a32 / b32; -} - -/* no checking needed for silk_SAT8 - no checking needed for silk_SAT16 - no checking needed for silk_SAT32 - no checking needed for silk_POS_SAT32 - no checking needed for silk_ADD_POS_SAT8 - no checking needed for silk_ADD_POS_SAT16 - no checking needed for silk_ADD_POS_SAT32 - no checking needed for silk_ADD_POS_SAT64 */ - -#undef silk_LSHIFT8 -#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ - opus_int8 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 8; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT8(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT16 -#define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ - opus_int16 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 16; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT16(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT32 -#define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ - opus_int32 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 32; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT32(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT64 -#define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){ - opus_int64 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 64; - fail |= (ret>>shift) != ((opus_int64)a); - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT_ovflw -#define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */ - { - fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a << shift; -} - -#undef silk_LSHIFT_uint -#define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ - opus_uint32 ret; - ret = a << shift; - if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift)) - { - fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_RSHIFT8 -#define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>=8) ) - { - fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT16 -#define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>=16) ) - { - fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT32 -#define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>=32) ) - { - fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT64 -#define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){ - if ( (shift < 0) || (shift>=64) ) - { - fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT_uint -#define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>32) ) - { - fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_ADD_LSHIFT -#define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){ - opus_int16 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_ADD_LSHIFT32 -#define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_ADD_LSHIFT_uint -#define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ - opus_uint32 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_ADD_RSHIFT -#define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){ - opus_int16 ret; - ret = a + (b >> shift); - if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_ADD_RSHIFT32 -#define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a + (b >> shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_ADD_RSHIFT_uint -#define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ - opus_uint32 ret; - ret = a + (b >> shift); - if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_SUB_LSHIFT32 -#define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a - (b << shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_SUB_RSHIFT32 -#define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a - (b >> shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_RSHIFT_ROUND -#define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - /* the marco definition can't handle a shift of zero */ - if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) ) - { - fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_RSHIFT_ROUND64 -#define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){ - opus_int64 ret; - /* the marco definition can't handle a shift of zero */ - if ( (shift <= 0) || (shift>=64) ) - { - fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - return ret; -} - -/* silk_abs is used on floats also, so doesn't work... */ -/*#undef silk_abs -static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){ - silk_assert(a != 0x80000000); - return (((a) > 0) ? (a) : -(a)); // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN -}*/ - -#undef silk_abs_int64 -#define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){ - if ( a == silk_int64_MIN ) - { - fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ -} - -#undef silk_abs_int32 -#define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){ - if ( a == silk_int32_MIN ) - { - fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return silk_abs(a); -} - -#undef silk_CHECK_FIT8 -#define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){ - opus_int8 ret; - ret = (opus_int8)a; - if ( (opus_int64)ret != a ) - { - fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return( ret ); -} - -#undef silk_CHECK_FIT16 -#define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){ - opus_int16 ret; - ret = (opus_int16)a; - if ( (opus_int64)ret != a ) - { - fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return( ret ); -} - -#undef silk_CHECK_FIT32 -#define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){ - opus_int32 ret; - ret = (opus_int32)a; - if ( (opus_int64)ret != a ) - { - fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return( ret ); -} - -/* no checking for silk_NSHIFT_MUL_32_32 - no checking for silk_NSHIFT_MUL_16_16 - no checking needed for silk_min - no checking needed for silk_max - no checking needed for silk_sign -*/ - -#endif -#endif /* MACRO_DEBUG_H */ diff --git a/thirdparty/opus/silk/NLSF2A.c b/thirdparty/opus/silk/NLSF2A.c deleted file mode 100644 index b1c559ea68..0000000000 --- a/thirdparty/opus/silk/NLSF2A.c +++ /dev/null @@ -1,178 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* conversion between prediction filter coefficients and LSFs */ -/* order should be even */ -/* a piecewise linear approximation maps LSF <-> cos(LSF) */ -/* therefore the result is not accurate LSFs, but the two */ -/* functions are accurate inverses of each other */ - -#include "SigProc_FIX.h" -#include "tables.h" - -#define QA 16 - -/* helper function for NLSF2A(..) */ -static OPUS_INLINE void silk_NLSF2A_find_poly( - opus_int32 *out, /* O intermediate polynomial, QA [dd+1] */ - const opus_int32 *cLSF, /* I vector of interleaved 2*cos(LSFs), QA [d] */ - opus_int dd /* I polynomial order (= 1/2 * filter order) */ -) -{ - opus_int k, n; - opus_int32 ftmp; - - out[0] = silk_LSHIFT( 1, QA ); - out[1] = -cLSF[0]; - for( k = 1; k < dd; k++ ) { - ftmp = cLSF[2*k]; /* QA*/ - out[k+1] = silk_LSHIFT( out[k-1], 1 ) - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[k] ), QA ); - for( n = k; n > 1; n-- ) { - out[n] += out[n-2] - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[n-1] ), QA ); - } - out[1] -= ftmp; - } -} - -/* compute whitening filter coefficients from normalized line spectral frequencies */ -void silk_NLSF2A( - opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ - const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ - const opus_int d /* I filter order (should be even) */ -) -{ - /* This ordering was found to maximize quality. It improves numerical accuracy of - silk_NLSF2A_find_poly() compared to "standard" ordering. */ - static const unsigned char ordering16[16] = { - 0, 15, 8, 7, 4, 11, 12, 3, 2, 13, 10, 5, 6, 9, 14, 1 - }; - static const unsigned char ordering10[10] = { - 0, 9, 6, 3, 4, 5, 8, 1, 2, 7 - }; - const unsigned char *ordering; - opus_int k, i, dd; - opus_int32 cos_LSF_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; - opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta; - opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ]; - opus_int32 maxabs, absval, idx=0, sc_Q16; - - silk_assert( LSF_COS_TAB_SZ_FIX == 128 ); - silk_assert( d==10||d==16 ); - - /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */ - ordering = d == 16 ? ordering16 : ordering10; - for( k = 0; k < d; k++ ) { - silk_assert(NLSF[k] >= 0 ); - - /* f_int on a scale 0-127 (rounded down) */ - f_int = silk_RSHIFT( NLSF[k], 15 - 7 ); - - /* f_frac, range: 0..255 */ - f_frac = NLSF[k] - silk_LSHIFT( f_int, 15 - 7 ); - - silk_assert(f_int >= 0); - silk_assert(f_int < LSF_COS_TAB_SZ_FIX ); - - /* Read start and end value from table */ - cos_val = silk_LSFCosTab_FIX_Q12[ f_int ]; /* Q12 */ - delta = silk_LSFCosTab_FIX_Q12[ f_int + 1 ] - cos_val; /* Q12, with a range of 0..200 */ - - /* Linear interpolation */ - cos_LSF_QA[ordering[k]] = silk_RSHIFT_ROUND( silk_LSHIFT( cos_val, 8 ) + silk_MUL( delta, f_frac ), 20 - QA ); /* QA */ - } - - dd = silk_RSHIFT( d, 1 ); - - /* generate even and odd polynomials using convolution */ - silk_NLSF2A_find_poly( P, &cos_LSF_QA[ 0 ], dd ); - silk_NLSF2A_find_poly( Q, &cos_LSF_QA[ 1 ], dd ); - - /* convert even and odd polynomials to opus_int32 Q12 filter coefs */ - for( k = 0; k < dd; k++ ) { - Ptmp = P[ k+1 ] + P[ k ]; - Qtmp = Q[ k+1 ] - Q[ k ]; - - /* the Ptmp and Qtmp values at this stage need to fit in int32 */ - a32_QA1[ k ] = -Qtmp - Ptmp; /* QA+1 */ - a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */ - } - - /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */ - for( i = 0; i < 10; i++ ) { - /* Find maximum absolute value and its index */ - maxabs = 0; - for( k = 0; k < d; k++ ) { - absval = silk_abs( a32_QA1[k] ); - if( absval > maxabs ) { - maxabs = absval; - idx = k; - } - } - maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */ - - if( maxabs > silk_int16_MAX ) { - /* Reduce magnitude of prediction coefficients */ - maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */ - sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ), - silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) ); - silk_bwexpander_32( a32_QA1, d, sc_Q16 ); - } else { - break; - } - } - - if( i == 10 ) { - /* Reached the last iteration, clip the coefficients */ - for( k = 0; k < d; k++ ) { - a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */ - a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 ); - } - } else { - for( k = 0; k < d; k++ ) { - a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ - } - } - - for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) { - if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) { - /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */ - /* on the unscaled coefficients, convert to Q12 and measure again */ - silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) ); - for( k = 0; k < d; k++ ) { - a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ - } - } else { - break; - } - } -} - diff --git a/thirdparty/opus/silk/NLSF_VQ.c b/thirdparty/opus/silk/NLSF_VQ.c deleted file mode 100644 index 69b6e22e18..0000000000 --- a/thirdparty/opus/silk/NLSF_VQ.c +++ /dev/null @@ -1,68 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */ -void silk_NLSF_VQ( - opus_int32 err_Q26[], /* O Quantization errors [K] */ - const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ - const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ - const opus_int K, /* I Number of codebook vectors */ - const opus_int LPC_order /* I Number of LPCs */ -) -{ - opus_int i, m; - opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26; - - silk_assert( LPC_order <= 16 ); - silk_assert( ( LPC_order & 1 ) == 0 ); - - /* Loop over codebook */ - for( i = 0; i < K; i++ ) { - sum_error_Q26 = 0; - for( m = 0; m < LPC_order; m += 2 ) { - /* Compute weighted squared quantization error for index m */ - diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ - sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 ); - - /* Compute weighted squared quantization error for index m + 1 */ - diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ - sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 ); - - sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 ); - - silk_assert( sum_error_Q26 >= 0 ); - silk_assert( sum_error_Q30 >= 0 ); - } - err_Q26[ i ] = sum_error_Q26; - } -} diff --git a/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c b/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c deleted file mode 100644 index 04894c59ab..0000000000 --- a/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c +++ /dev/null @@ -1,80 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "define.h" -#include "SigProc_FIX.h" - -/* -R. Laroia, N. Phamdo and N. Farvardin, "Robust and Efficient Quantization of Speech LSP -Parameters Using Structured Vector Quantization", Proc. IEEE Int. Conf. Acoust., Speech, -Signal Processing, pp. 641-644, 1991. -*/ - -/* Laroia low complexity NLSF weights */ -void silk_NLSF_VQ_weights_laroia( - opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ - const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ - const opus_int D /* I Input vector dimension (even) */ -) -{ - opus_int k; - opus_int32 tmp1_int, tmp2_int; - - silk_assert( D > 0 ); - silk_assert( ( D & 1 ) == 0 ); - - /* First value */ - tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 ); - tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); - tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 ); - tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int ); - pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 ); - - /* Main loop */ - for( k = 1; k < D - 1; k += 2 ) { - tmp1_int = silk_max_int( pNLSF_Q15[ k + 1 ] - pNLSF_Q15[ k ], 1 ); - tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); - pNLSFW_Q_OUT[ k ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ k ] > 0 ); - - tmp2_int = silk_max_int( pNLSF_Q15[ k + 2 ] - pNLSF_Q15[ k + 1 ], 1 ); - tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int ); - pNLSFW_Q_OUT[ k + 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ k + 1 ] > 0 ); - } - - /* Last value */ - tmp1_int = silk_max_int( ( 1 << 15 ) - pNLSF_Q15[ D - 1 ], 1 ); - tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); - pNLSFW_Q_OUT[ D - 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ D - 1 ] > 0 ); -} diff --git a/thirdparty/opus/silk/NLSF_decode.c b/thirdparty/opus/silk/NLSF_decode.c deleted file mode 100644 index 9f715060b8..0000000000 --- a/thirdparty/opus/silk/NLSF_decode.c +++ /dev/null @@ -1,101 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Predictive dequantizer for NLSF residuals */ -static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */ - opus_int16 x_Q10[], /* O Output [ order ] */ - const opus_int8 indices[], /* I Quantization indices [ order ] */ - const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ - const opus_int quant_step_size_Q16, /* I Quantization step size */ - const opus_int16 order /* I Number of input values */ -) -{ - opus_int i, out_Q10, pred_Q10; - - out_Q10 = 0; - for( i = order-1; i >= 0; i-- ) { - pred_Q10 = silk_RSHIFT( silk_SMULBB( out_Q10, (opus_int16)pred_coef_Q8[ i ] ), 8 ); - out_Q10 = silk_LSHIFT( indices[ i ], 10 ); - if( out_Q10 > 0 ) { - out_Q10 = silk_SUB16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( out_Q10 < 0 ) { - out_Q10 = silk_ADD16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } - out_Q10 = silk_SMLAWB( pred_Q10, (opus_int32)out_Q10, quant_step_size_Q16 ); - x_Q10[ i ] = out_Q10; - } -} - - -/***********************/ -/* NLSF vector decoder */ -/***********************/ -void silk_NLSF_decode( - opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ -) -{ - opus_int i; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - opus_int16 res_Q10[ MAX_LPC_ORDER ]; - opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; - opus_int32 W_tmp_Q9, NLSF_Q15_tmp; - const opus_uint8 *pCB_element; - - /* Decode first stage */ - pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; - for( i = 0; i < psNLSF_CB->order; i++ ) { - pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 ); - } - - /* Unpack entropy table indices and predictor for current CB1 index */ - silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] ); - - /* Predictive residual dequantizer */ - silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order ); - - /* Weights from codebook vector */ - silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order ); - - /* Apply inverse square-rooted weights and add to output */ - for( i = 0; i < psNLSF_CB->order; i++ ) { - W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); - NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) ); - pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 ); - } - - /* NLSF stabilization */ - silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); -} diff --git a/thirdparty/opus/silk/NLSF_del_dec_quant.c b/thirdparty/opus/silk/NLSF_del_dec_quant.c deleted file mode 100644 index de88fee060..0000000000 --- a/thirdparty/opus/silk/NLSF_del_dec_quant.c +++ /dev/null @@ -1,217 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Delayed-decision quantizer for NLSF residuals */ -opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */ - opus_int8 indices[], /* O Quantization indices [ order ] */ - const opus_int16 x_Q10[], /* I Input [ order ] */ - const opus_int16 w_Q5[], /* I Weights [ order ] */ - const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ - const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */ - const opus_uint8 ec_rates_Q5[], /* I Rates [] */ - const opus_int quant_step_size_Q16, /* I Quantization step size */ - const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */ - const opus_int32 mu_Q20, /* I R/D tradeoff */ - const opus_int16 order /* I Number of input values */ -) -{ - opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10; - opus_int pred_Q10, diff_Q10, rate0_Q5, rate1_Q5; - opus_int16 out0_Q10, out1_Q10; - opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25; - opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ]; - opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ]; - opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; - opus_int32 RD_Q25[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; - opus_int32 RD_min_Q25[ NLSF_QUANT_DEL_DEC_STATES ]; - opus_int32 RD_max_Q25[ NLSF_QUANT_DEL_DEC_STATES ]; - const opus_uint8 *rates_Q5; - - opus_int out0_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT]; - opus_int out1_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT]; - - for (i = -NLSF_QUANT_MAX_AMPLITUDE_EXT; i <= NLSF_QUANT_MAX_AMPLITUDE_EXT-1; i++) - { - out0_Q10 = silk_LSHIFT( i, 10 ); - out1_Q10 = silk_ADD16( out0_Q10, 1024 ); - if( i > 0 ) { - out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( i == 0 ) { - out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( i == -1 ) { - out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else { - out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } - out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 ); - out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 ); - } - - silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */ - - nStates = 1; - RD_Q25[ 0 ] = 0; - prev_out_Q10[ 0 ] = 0; - for( i = order - 1; ; i-- ) { - rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ]; - in_Q10 = x_Q10[ i ]; - for( j = 0; j < nStates; j++ ) { - pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 ); - res_Q10 = silk_SUB16( in_Q10, pred_Q10 ); - ind_tmp = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 ); - ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 ); - ind[ j ][ i ] = (opus_int8)ind_tmp; - - /* compute outputs for ind_tmp and ind_tmp + 1 */ - out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ]; - out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ]; - - out0_Q10 = silk_ADD16( out0_Q10, pred_Q10 ); - out1_Q10 = silk_ADD16( out1_Q10, pred_Q10 ); - prev_out_Q10[ j ] = out0_Q10; - prev_out_Q10[ j + nStates ] = out1_Q10; - - /* compute RD for ind_tmp and ind_tmp + 1 */ - if( ind_tmp + 1 >= NLSF_QUANT_MAX_AMPLITUDE ) { - if( ind_tmp + 1 == NLSF_QUANT_MAX_AMPLITUDE ) { - rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ]; - rate1_Q5 = 280; - } else { - rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, 43, ind_tmp ); - rate1_Q5 = silk_ADD16( rate0_Q5, 43 ); - } - } else if( ind_tmp <= -NLSF_QUANT_MAX_AMPLITUDE ) { - if( ind_tmp == -NLSF_QUANT_MAX_AMPLITUDE ) { - rate0_Q5 = 280; - rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ]; - } else { - rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, -43, ind_tmp ); - rate1_Q5 = silk_SUB16( rate0_Q5, 43 ); - } - } else { - rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ]; - rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ]; - } - RD_tmp_Q25 = RD_Q25[ j ]; - diff_Q10 = silk_SUB16( in_Q10, out0_Q10 ); - RD_Q25[ j ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 ); - diff_Q10 = silk_SUB16( in_Q10, out1_Q10 ); - RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 ); - } - - if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) { - /* double number of states and copy */ - for( j = 0; j < nStates; j++ ) { - ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1; - } - nStates = silk_LSHIFT( nStates, 1 ); - for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - ind[ j ][ i ] = ind[ j - nStates ][ i ]; - } - } else if( i > 0 ) { - /* sort lower and upper half of RD_Q25, pairwise */ - for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) { - RD_max_Q25[ j ] = RD_Q25[ j ]; - RD_min_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ]; - RD_Q25[ j ] = RD_min_Q25[ j ]; - RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] = RD_max_Q25[ j ]; - /* swap prev_out values */ - out0_Q10 = prev_out_Q10[ j ]; - prev_out_Q10[ j ] = prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ]; - prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ] = out0_Q10; - ind_sort[ j ] = j + NLSF_QUANT_DEL_DEC_STATES; - } else { - RD_min_Q25[ j ] = RD_Q25[ j ]; - RD_max_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ]; - ind_sort[ j ] = j; - } - } - /* compare the highest RD values of the winning half with the lowest one in the losing half, and copy if necessary */ - /* afterwards ind_sort[] will contain the indices of the NLSF_QUANT_DEL_DEC_STATES winning RD values */ - while( 1 ) { - min_max_Q25 = silk_int32_MAX; - max_min_Q25 = 0; - ind_min_max = 0; - ind_max_min = 0; - for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - if( min_max_Q25 > RD_max_Q25[ j ] ) { - min_max_Q25 = RD_max_Q25[ j ]; - ind_min_max = j; - } - if( max_min_Q25 < RD_min_Q25[ j ] ) { - max_min_Q25 = RD_min_Q25[ j ]; - ind_max_min = j; - } - } - if( min_max_Q25 >= max_min_Q25 ) { - break; - } - /* copy ind_min_max to ind_max_min */ - ind_sort[ ind_max_min ] = ind_sort[ ind_min_max ] ^ NLSF_QUANT_DEL_DEC_STATES; - RD_Q25[ ind_max_min ] = RD_Q25[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ]; - prev_out_Q10[ ind_max_min ] = prev_out_Q10[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ]; - RD_min_Q25[ ind_max_min ] = 0; - RD_max_Q25[ ind_min_max ] = silk_int32_MAX; - silk_memcpy( ind[ ind_max_min ], ind[ ind_min_max ], MAX_LPC_ORDER * sizeof( opus_int8 ) ); - } - /* increment index if it comes from the upper half */ - for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 ); - } - } else { /* i == 0 */ - break; - } - } - - /* last sample: find winner, copy indices and return RD value */ - ind_tmp = 0; - min_Q25 = silk_int32_MAX; - for( j = 0; j < 2 * NLSF_QUANT_DEL_DEC_STATES; j++ ) { - if( min_Q25 > RD_Q25[ j ] ) { - min_Q25 = RD_Q25[ j ]; - ind_tmp = j; - } - } - for( j = 0; j < order; j++ ) { - indices[ j ] = ind[ ind_tmp & ( NLSF_QUANT_DEL_DEC_STATES - 1 ) ][ j ]; - silk_assert( indices[ j ] >= -NLSF_QUANT_MAX_AMPLITUDE_EXT ); - silk_assert( indices[ j ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT ); - } - indices[ 0 ] += silk_RSHIFT( ind_tmp, NLSF_QUANT_DEL_DEC_STATES_LOG2 ); - silk_assert( indices[ 0 ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT ); - silk_assert( min_Q25 >= 0 ); - return min_Q25; -} diff --git a/thirdparty/opus/silk/NLSF_encode.c b/thirdparty/opus/silk/NLSF_encode.c deleted file mode 100644 index f03c3f1c35..0000000000 --- a/thirdparty/opus/silk/NLSF_encode.c +++ /dev/null @@ -1,137 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/***********************/ -/* NLSF vector encoder */ -/***********************/ -opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */ - const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */ - const opus_int nSurvivors, /* I Max survivors after first stage */ - const opus_int signalType /* I Signal type: 0/1/2 */ -) -{ - opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7; - opus_int32 W_tmp_Q9, ret; - VARDECL( opus_int32, err_Q26 ); - VARDECL( opus_int32, RD_Q25 ); - VARDECL( opus_int, tempIndices1 ); - VARDECL( opus_int8, tempIndices2 ); - opus_int16 res_Q15[ MAX_LPC_ORDER ]; - opus_int16 res_Q10[ MAX_LPC_ORDER ]; - opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ]; - opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; - opus_int16 W_adj_Q5[ MAX_LPC_ORDER ]; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - const opus_uint8 *pCB_element, *iCDF_ptr; - SAVE_STACK; - - silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS ); - silk_assert( signalType >= 0 && signalType <= 2 ); - silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 ); - - /* NLSF stabilization */ - silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); - - /* First stage: VQ */ - ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 ); - silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order ); - - /* Sort the quantization errors */ - ALLOC( tempIndices1, nSurvivors, opus_int ); - silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors ); - - ALLOC( RD_Q25, nSurvivors, opus_int32 ); - ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 ); - - /* Loop over survivors */ - for( s = 0; s < nSurvivors; s++ ) { - ind1 = tempIndices1[ s ]; - - /* Residual after first stage */ - pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ]; - for( i = 0; i < psNLSF_CB->order; i++ ) { - NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 ); - res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ]; - } - - /* Weights from codebook vector */ - silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order ); - - /* Apply square-rooted weights */ - for( i = 0; i < psNLSF_CB->order; i++ ) { - W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); - res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 ); - } - - /* Modify input weights accordingly */ - for( i = 0; i < psNLSF_CB->order; i++ ) { - W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] ); - } - - /* Unpack entropy table indices and predictor for current CB1 index */ - silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, ind1 ); - - /* Trellis quantizer */ - RD_Q25[ s ] = silk_NLSF_del_dec_quant( &tempIndices2[ s * MAX_LPC_ORDER ], res_Q10, W_adj_Q5, pred_Q8, ec_ix, - psNLSF_CB->ec_Rates_Q5, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->invQuantStepSize_Q6, NLSF_mu_Q20, psNLSF_CB->order ); - - /* Add rate for first stage */ - iCDF_ptr = &psNLSF_CB->CB1_iCDF[ ( signalType >> 1 ) * psNLSF_CB->nVectors ]; - if( ind1 == 0 ) { - prob_Q8 = 256 - iCDF_ptr[ ind1 ]; - } else { - prob_Q8 = iCDF_ptr[ ind1 - 1 ] - iCDF_ptr[ ind1 ]; - } - bits_q7 = ( 8 << 7 ) - silk_lin2log( prob_Q8 ); - RD_Q25[ s ] = silk_SMLABB( RD_Q25[ s ], bits_q7, silk_RSHIFT( NLSF_mu_Q20, 2 ) ); - } - - /* Find the lowest rate-distortion error */ - silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 ); - - NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ]; - silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) ); - - /* Decode */ - silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB ); - - ret = RD_Q25[ 0 ]; - RESTORE_STACK; - return ret; -} diff --git a/thirdparty/opus/silk/NLSF_stabilize.c b/thirdparty/opus/silk/NLSF_stabilize.c deleted file mode 100644 index 8f3426b91e..0000000000 --- a/thirdparty/opus/silk/NLSF_stabilize.c +++ /dev/null @@ -1,142 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* NLSF stabilizer: */ -/* */ -/* - Moves NLSFs further apart if they are too close */ -/* - Moves NLSFs away from borders if they are too close */ -/* - High effort to achieve a modification with minimum */ -/* Euclidean distance to input vector */ -/* - Output are sorted NLSF coefficients */ -/* */ - -#include "SigProc_FIX.h" - -/* Constant Definitions */ -#define MAX_LOOPS 20 - -/* NLSF stabilizer, for a single input data vector */ -void silk_NLSF_stabilize( - opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */ - const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */ - const opus_int L /* I Number of NLSF parameters in the input vector */ -) -{ - opus_int i, I=0, k, loops; - opus_int16 center_freq_Q15; - opus_int32 diff_Q15, min_diff_Q15, min_center_Q15, max_center_Q15; - - /* This is necessary to ensure an output within range of a opus_int16 */ - silk_assert( NDeltaMin_Q15[L] >= 1 ); - - for( loops = 0; loops < MAX_LOOPS; loops++ ) { - /**************************/ - /* Find smallest distance */ - /**************************/ - /* First element */ - min_diff_Q15 = NLSF_Q15[0] - NDeltaMin_Q15[0]; - I = 0; - /* Middle elements */ - for( i = 1; i <= L-1; i++ ) { - diff_Q15 = NLSF_Q15[i] - ( NLSF_Q15[i-1] + NDeltaMin_Q15[i] ); - if( diff_Q15 < min_diff_Q15 ) { - min_diff_Q15 = diff_Q15; - I = i; - } - } - /* Last element */ - diff_Q15 = ( 1 << 15 ) - ( NLSF_Q15[L-1] + NDeltaMin_Q15[L] ); - if( diff_Q15 < min_diff_Q15 ) { - min_diff_Q15 = diff_Q15; - I = L; - } - - /***************************************************/ - /* Now check if the smallest distance non-negative */ - /***************************************************/ - if( min_diff_Q15 >= 0 ) { - return; - } - - if( I == 0 ) { - /* Move away from lower limit */ - NLSF_Q15[0] = NDeltaMin_Q15[0]; - - } else if( I == L) { - /* Move away from higher limit */ - NLSF_Q15[L-1] = ( 1 << 15 ) - NDeltaMin_Q15[L]; - - } else { - /* Find the lower extreme for the location of the current center frequency */ - min_center_Q15 = 0; - for( k = 0; k < I; k++ ) { - min_center_Q15 += NDeltaMin_Q15[k]; - } - min_center_Q15 += silk_RSHIFT( NDeltaMin_Q15[I], 1 ); - - /* Find the upper extreme for the location of the current center frequency */ - max_center_Q15 = 1 << 15; - for( k = L; k > I; k-- ) { - max_center_Q15 -= NDeltaMin_Q15[k]; - } - max_center_Q15 -= silk_RSHIFT( NDeltaMin_Q15[I], 1 ); - - /* Move apart, sorted by value, keeping the same center frequency */ - center_freq_Q15 = (opus_int16)silk_LIMIT_32( silk_RSHIFT_ROUND( (opus_int32)NLSF_Q15[I-1] + (opus_int32)NLSF_Q15[I], 1 ), - min_center_Q15, max_center_Q15 ); - NLSF_Q15[I-1] = center_freq_Q15 - silk_RSHIFT( NDeltaMin_Q15[I], 1 ); - NLSF_Q15[I] = NLSF_Q15[I-1] + NDeltaMin_Q15[I]; - } - } - - /* Safe and simple fall back method, which is less ideal than the above */ - if( loops == MAX_LOOPS ) - { - /* Insertion sort (fast for already almost sorted arrays): */ - /* Best case: O(n) for an already sorted array */ - /* Worst case: O(n^2) for an inversely sorted array */ - silk_insertion_sort_increasing_all_values_int16( &NLSF_Q15[0], L ); - - /* First NLSF should be no less than NDeltaMin[0] */ - NLSF_Q15[0] = silk_max_int( NLSF_Q15[0], NDeltaMin_Q15[0] ); - - /* Keep delta_min distance between the NLSFs */ - for( i = 1; i < L; i++ ) - NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], silk_ADD_SAT16( NLSF_Q15[i-1], NDeltaMin_Q15[i] ) ); - - /* Last NLSF should be no higher than 1 - NDeltaMin[L] */ - NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] ); - - /* Keep NDeltaMin distance between the NLSFs */ - for( i = L-2; i >= 0; i-- ) - NLSF_Q15[i] = silk_min_int( NLSF_Q15[i], NLSF_Q15[i+1] - NDeltaMin_Q15[i+1] ); - } -} diff --git a/thirdparty/opus/silk/NLSF_unpack.c b/thirdparty/opus/silk/NLSF_unpack.c deleted file mode 100644 index 17bd23f752..0000000000 --- a/thirdparty/opus/silk/NLSF_unpack.c +++ /dev/null @@ -1,55 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Unpack predictor values and indices for entropy coding tables */ -void silk_NLSF_unpack( - opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */ - opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int CB1_index /* I Index of vector in first LSF codebook */ -) -{ - opus_int i; - opus_uint8 entry; - const opus_uint8 *ec_sel_ptr; - - ec_sel_ptr = &psNLSF_CB->ec_sel[ CB1_index * psNLSF_CB->order / 2 ]; - for( i = 0; i < psNLSF_CB->order; i += 2 ) { - entry = *ec_sel_ptr++; - ec_ix [ i ] = silk_SMULBB( silk_RSHIFT( entry, 1 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 ); - pred_Q8[ i ] = psNLSF_CB->pred_Q8[ i + ( entry & 1 ) * ( psNLSF_CB->order - 1 ) ]; - ec_ix [ i + 1 ] = silk_SMULBB( silk_RSHIFT( entry, 5 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 ); - pred_Q8[ i + 1 ] = psNLSF_CB->pred_Q8[ i + ( silk_RSHIFT( entry, 4 ) & 1 ) * ( psNLSF_CB->order - 1 ) + 1 ]; - } -} - diff --git a/thirdparty/opus/silk/NSQ.c b/thirdparty/opus/silk/NSQ.c deleted file mode 100644 index 43e3fee7e0..0000000000 --- a/thirdparty/opus/silk/NSQ.c +++ /dev/null @@ -1,429 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "NSQ.h" - - -static OPUS_INLINE void silk_nsq_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -); - -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE void silk_noise_shape_quantizer( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - int arch /* I Architecture */ -); -#endif - -void silk_NSQ_c -( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int k, lag, start_idx, LSF_interpolation_flag; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - SAVE_STACK; - - NSQ->rand_seed = psIndices->Seed; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - /* Set up pointers to start of sub frame */ - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->rewhite_flag = 1; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - } - } - - silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); - - silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, - offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Update lagPrev for next frame */ - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech and noise shaping signals */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/***********************************/ -/* silk_noise_shape_quantizer */ -/***********************************/ - -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE -#endif -void silk_noise_shape_quantizer( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - int arch /* I Architecture */ -) -{ - opus_int i; - opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; - opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; - opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; -#ifdef silk_short_prediction_create_arch_coef - opus_int32 a_Q12_arch[MAX_LPC_ORDER]; -#endif - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - /* Set up short term AR state */ - psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; - -#ifdef silk_short_prediction_create_arch_coef - silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder); -#endif - - for( i = 0; i < length; i++ ) { - /* Generate dither */ - NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); - - /* Short-term prediction */ - LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch); - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q13 = 2; - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - pred_lag_ptr++; - } else { - LTP_pred_Q13 = 0; - } - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch); - - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 ); - - n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); - n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 ); - - silk_assert( lag > 0 || signalType != TYPE_VOICED ); - - /* Combine prediction and noise shaping signals */ - tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ - tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); - shp_lag_ptr++; - - tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 ); /* Q13 */ - tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 3 ); /* Q10 */ - } else { - tmp1 = silk_RSHIFT_ROUND( tmp1, 2 ); /* Q10 */ - } - - r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( NSQ->rand_seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* Q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q20 = silk_SMLABB( rd1_Q20, rr_Q10, rr_Q10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q20 = silk_SMLABB( rd2_Q20, rr_Q10, rr_Q10 ); - - if( rd2_Q20 < rd1_Q20 ) { - q1_Q10 = q2_Q10; - } - - pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 ); - - /* Excitation */ - exc_Q14 = silk_LSHIFT( q1_Q10, 4 ); - if ( NSQ->rand_seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 ); - xq_Q14 = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 ); - - /* Scale XQ back to normal level before saving */ - xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( xq_Q14, Gain_Q10 ), 8 ) ); - - /* Update states */ - psLPC_Q14++; - *psLPC_Q14 = xq_Q14; - sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); - NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14; - - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); - sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Make dither dependent on quantized signal */ - NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] ); - } - - /* Update LPC synth buffer */ - silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); -} - -static OPUS_INLINE void silk_nsq_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -) -{ - opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - for( i = 0; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); - } - } -} diff --git a/thirdparty/opus/silk/NSQ.h b/thirdparty/opus/silk/NSQ.h deleted file mode 100644 index 971832f660..0000000000 --- a/thirdparty/opus/silk/NSQ.h +++ /dev/null @@ -1,101 +0,0 @@ -/*********************************************************************** -Copyright (c) 2014 Vidyo. -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifndef SILK_NSQ_H -#define SILK_NSQ_H - -#include "SigProc_FIX.h" - -#undef silk_short_prediction_create_arch_coef - -static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_c(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order) -{ - opus_int32 out; - silk_assert( order == 10 || order == 16 ); - - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - out = silk_RSHIFT( order, 1 ); - out = silk_SMLAWB( out, buf32[ 0 ], coef16[ 0 ] ); - out = silk_SMLAWB( out, buf32[ -1 ], coef16[ 1 ] ); - out = silk_SMLAWB( out, buf32[ -2 ], coef16[ 2 ] ); - out = silk_SMLAWB( out, buf32[ -3 ], coef16[ 3 ] ); - out = silk_SMLAWB( out, buf32[ -4 ], coef16[ 4 ] ); - out = silk_SMLAWB( out, buf32[ -5 ], coef16[ 5 ] ); - out = silk_SMLAWB( out, buf32[ -6 ], coef16[ 6 ] ); - out = silk_SMLAWB( out, buf32[ -7 ], coef16[ 7 ] ); - out = silk_SMLAWB( out, buf32[ -8 ], coef16[ 8 ] ); - out = silk_SMLAWB( out, buf32[ -9 ], coef16[ 9 ] ); - - if( order == 16 ) - { - out = silk_SMLAWB( out, buf32[ -10 ], coef16[ 10 ] ); - out = silk_SMLAWB( out, buf32[ -11 ], coef16[ 11 ] ); - out = silk_SMLAWB( out, buf32[ -12 ], coef16[ 12 ] ); - out = silk_SMLAWB( out, buf32[ -13 ], coef16[ 13 ] ); - out = silk_SMLAWB( out, buf32[ -14 ], coef16[ 14 ] ); - out = silk_SMLAWB( out, buf32[ -15 ], coef16[ 15 ] ); - } - return out; -} - -#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) ((void)arch,silk_noise_shape_quantizer_short_prediction_c(in, coef, order)) - -static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_c(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order) -{ - opus_int32 out; - opus_int32 tmp1, tmp2; - opus_int j; - - tmp2 = data0[0]; - tmp1 = data1[0]; - data1[0] = tmp2; - - out = silk_RSHIFT(order, 1); - out = silk_SMLAWB(out, tmp2, coef[0]); - - for (j = 2; j < order; j += 2) { - tmp2 = data1[j - 1]; - data1[j - 1] = tmp1; - out = silk_SMLAWB(out, tmp1, coef[j - 1]); - tmp1 = data1[j + 0]; - data1[j + 0] = tmp2; - out = silk_SMLAWB(out, tmp2, coef[j]); - } - data1[order - 1] = tmp1; - out = silk_SMLAWB(out, tmp1, coef[order - 1]); - /* Q11 -> Q12 */ - out = silk_LSHIFT32( out, 1 ); - return out; -} - -#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order)) - -#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -#include "arm/NSQ_neon.h" -#endif - -#endif /* SILK_NSQ_H */ diff --git a/thirdparty/opus/silk/NSQ_del_dec.c b/thirdparty/opus/silk/NSQ_del_dec.c deleted file mode 100644 index ab6feeac98..0000000000 --- a/thirdparty/opus/silk/NSQ_del_dec.c +++ /dev/null @@ -1,716 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "NSQ.h" - - -typedef struct { - opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; - opus_int32 RandState[ DECISION_DELAY ]; - opus_int32 Q_Q10[ DECISION_DELAY ]; - opus_int32 Xq_Q14[ DECISION_DELAY ]; - opus_int32 Pred_Q15[ DECISION_DELAY ]; - opus_int32 Shape_Q14[ DECISION_DELAY ]; - opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_AR_Q14; - opus_int32 Seed; - opus_int32 SeedInit; - opus_int32 RD_Q10; -} NSQ_del_dec_struct; - -typedef struct { - opus_int32 Q_Q10; - opus_int32 RD_Q10; - opus_int32 xq_Q14; - opus_int32 LF_AR_Q14; - opus_int32 sLTP_shp_Q14; - opus_int32 LPC_exc_Q14; -} NSQ_sample_struct; - -typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; - -#if defined(MIPSr1_ASM) -#include "mips/NSQ_del_dec_mipsr1.h" -#endif -static OPUS_INLINE void silk_nsq_del_dec_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -); - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay, /* I */ - int arch /* I */ -); - -void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; - opus_int last_smple_idx, smpl_buf_idx, decisionDelay; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - opus_int32 RDmin_Q10, Gain_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - VARDECL( opus_int32, delayedGain_Q10 ); - VARDECL( NSQ_del_dec_struct, psDelDec ); - NSQ_del_dec_struct *psDD; - SAVE_STACK; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - /* Initialize delayed decision states */ - ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct ); - silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) ); - for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psDD->Seed = ( k + psIndices->Seed ) & 3; - psDD->SeedInit = psDD->Seed; - psDD->RD_Q10 = 0; - psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; - psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; - silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); - } - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - smpl_buf_idx = 0; /* index of oldest samples */ - - decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); - - /* For voiced frames limit the decision delay to lower than the pitch lag */ - if( psIndices->signalType == TYPE_VOICED ) { - for( k = 0; k < psEncC->nb_subfr; k++ ) { - decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 ); - } - } else { - if( lag > 0 ) { - decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); - } - } - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); - /* Set up pointers to start of sub frame */ - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - subfr = 0; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - if( k == 2 ) { - /* RESET DELAYED DECISIONS */ - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { - if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ i ].RD_Q10; - Winner_ind = i; - } - } - for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) { - if( i != Winner_ind ) { - psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 ); - silk_assert( psDelDec[ i ].RD_Q10 >= 0 ); - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - last_smple_idx = smpl_buf_idx + decisionDelay; - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - - subfr = 0; - } - - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - NSQ->rewhite_flag = 1; - } - } - - silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, - psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); - - silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, - delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], - Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, - psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch ); - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ k ].RD_Q10; - Winner_ind = k; - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - psIndices->Seed = psDD->SeedInit; - last_smple_idx = smpl_buf_idx + decisionDelay; - Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) ); - - /* Update states */ - NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech signal */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay, /* I */ - int arch /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; - opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; - opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; -#ifdef silk_short_prediction_create_arch_coef - opus_int32 a_Q12_arch[MAX_LPC_ORDER]; -#endif - - VARDECL( NSQ_sample_pair, psSampleState ); - NSQ_del_dec_struct *psDD; - NSQ_sample_struct *psSS; - SAVE_STACK; - - silk_assert( nStatesDelayedDecision > 0 ); - ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - -#ifdef silk_short_prediction_create_arch_coef - silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder); -#endif - - for( i = 0; i < length; i++ ) { - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q14 = 2; - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - /* Delayed decision state */ - psDD = &psDelDec[ k ]; - - /* Sample state */ - psSS = psSampleState[ k ]; - - /* Generate dither */ - psDD->Seed = silk_RAND( psDD->Seed ); - - /* Pointer used in short term prediction and shaping */ - psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; - /* Short-term prediction */ - LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch); - LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ 0 ] = tmp2; - n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 ); - psDD->sAR2_Q14[ j - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ j + 0 ] = tmp2; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); - } - psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); - - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ - - n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ - tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ - - r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( psDD->Seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); - - if( rd1_Q10 < rd2_Q10 ) { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 0 ].Q_Q10 = q1_Q10; - psSS[ 1 ].Q_Q10 = q2_Q10; - } else { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 0 ].Q_Q10 = q2_Q10; - psSS[ 1 ].Q_Q10 = q1_Q10; - } - - /* Update states for best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 0 ].xq_Q14 = xq_Q14; - - /* Update states for second best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 1 ].xq_Q14 = xq_Q14; - } - - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; - for( k = 0; k < nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { - psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); - psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); - silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); - } - } - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; - RDmin_ind = k; - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, - ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); - silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); - } - - /* Write samples from winner to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psSS = &psSampleState[ k ][ 0 ]; - psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; - psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; - psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; - psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); - psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; - psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); - psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; - psDD->RD_Q10 = psSS->RD_Q10; - } - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - } - RESTORE_STACK; -} -#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */ - -static OPUS_INLINE void silk_nsq_del_dec_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -) -{ - opus_int i, k, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - NSQ_del_dec_struct *psDD; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - for( i = 0; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - - /* Scale scalar states */ - psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); - } - for( i = 0; i < DECISION_DELAY; i++ ) { - psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] ); - psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); - } - } - } -} diff --git a/thirdparty/opus/silk/PLC.c b/thirdparty/opus/silk/PLC.c deleted file mode 100644 index fb6ea887b7..0000000000 --- a/thirdparty/opus/silk/PLC.c +++ /dev/null @@ -1,446 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "PLC.h" - -#define NB_ATT 2 -static const opus_int16 HARM_ATT_Q15[NB_ATT] = { 32440, 31130 }; /* 0.99, 0.95 */ -static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT] = { 31130, 26214 }; /* 0.95, 0.8 */ -static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */ - -static OPUS_INLINE void silk_PLC_update( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl /* I/O Decoder control */ -); - -static OPUS_INLINE void silk_PLC_conceal( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* O LPC residual signal */ - int arch /* I Run-time architecture */ -); - - -void silk_PLC_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -) -{ - psDec->sPLC.pitchL_Q8 = silk_LSHIFT( psDec->frame_length, 8 - 1 ); - psDec->sPLC.prevGain_Q16[ 0 ] = SILK_FIX_CONST( 1, 16 ); - psDec->sPLC.prevGain_Q16[ 1 ] = SILK_FIX_CONST( 1, 16 ); - psDec->sPLC.subfr_length = 20; - psDec->sPLC.nb_subfr = 2; -} - -void silk_PLC( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O signal */ - opus_int lost, /* I Loss flag */ - int arch /* I Run-time architecture */ -) -{ - /* PLC control function */ - if( psDec->fs_kHz != psDec->sPLC.fs_kHz ) { - silk_PLC_Reset( psDec ); - psDec->sPLC.fs_kHz = psDec->fs_kHz; - } - - if( lost ) { - /****************************/ - /* Generate Signal */ - /****************************/ - silk_PLC_conceal( psDec, psDecCtrl, frame, arch ); - - psDec->lossCnt++; - } else { - /****************************/ - /* Update state */ - /****************************/ - silk_PLC_update( psDec, psDecCtrl ); - } -} - -/**************************************************/ -/* Update state of PLC */ -/**************************************************/ -static OPUS_INLINE void silk_PLC_update( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl /* I/O Decoder control */ -) -{ - opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14; - opus_int i, j; - silk_PLC_struct *psPLC; - - psPLC = &psDec->sPLC; - - /* Update parameters used in case of packet loss */ - psDec->prevSignalType = psDec->indices.signalType; - LTP_Gain_Q14 = 0; - if( psDec->indices.signalType == TYPE_VOICED ) { - /* Find the parameters for the last subframe which contains a pitch pulse */ - for( j = 0; j * psDec->subfr_length < psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; j++ ) { - if( j == psDec->nb_subfr ) { - break; - } - temp_LTP_Gain_Q14 = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - temp_LTP_Gain_Q14 += psDecCtrl->LTPCoef_Q14[ ( psDec->nb_subfr - 1 - j ) * LTP_ORDER + i ]; - } - if( temp_LTP_Gain_Q14 > LTP_Gain_Q14 ) { - LTP_Gain_Q14 = temp_LTP_Gain_Q14; - silk_memcpy( psPLC->LTPCoef_Q14, - &psDecCtrl->LTPCoef_Q14[ silk_SMULBB( psDec->nb_subfr - 1 - j, LTP_ORDER ) ], - LTP_ORDER * sizeof( opus_int16 ) ); - - psPLC->pitchL_Q8 = silk_LSHIFT( psDecCtrl->pitchL[ psDec->nb_subfr - 1 - j ], 8 ); - } - } - - silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) ); - psPLC->LTPCoef_Q14[ LTP_ORDER / 2 ] = LTP_Gain_Q14; - - /* Limit LT coefs */ - if( LTP_Gain_Q14 < V_PITCH_GAIN_START_MIN_Q14 ) { - opus_int scale_Q10; - opus_int32 tmp; - - tmp = silk_LSHIFT( V_PITCH_GAIN_START_MIN_Q14, 10 ); - scale_Q10 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) ); - for( i = 0; i < LTP_ORDER; i++ ) { - psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q10 ), 10 ); - } - } else if( LTP_Gain_Q14 > V_PITCH_GAIN_START_MAX_Q14 ) { - opus_int scale_Q14; - opus_int32 tmp; - - tmp = silk_LSHIFT( V_PITCH_GAIN_START_MAX_Q14, 14 ); - scale_Q14 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) ); - for( i = 0; i < LTP_ORDER; i++ ) { - psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q14 ), 14 ); - } - } - } else { - psPLC->pitchL_Q8 = silk_LSHIFT( silk_SMULBB( psDec->fs_kHz, 18 ), 8 ); - silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 )); - } - - /* Save LPC coeficients */ - silk_memcpy( psPLC->prevLPC_Q12, psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) ); - psPLC->prevLTP_scale_Q14 = psDecCtrl->LTP_scale_Q14; - - /* Save last two gains */ - silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) ); - - psPLC->subfr_length = psDec->subfr_length; - psPLC->nb_subfr = psDec->nb_subfr; -} - -static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2, - const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr) -{ - int i, k; - VARDECL( opus_int16, exc_buf ); - opus_int16 *exc_buf_ptr; - SAVE_STACK; - ALLOC( exc_buf, 2*subfr_length, opus_int16 ); - /* Find random noise component */ - /* Scale previous excitation signal */ - exc_buf_ptr = exc_buf; - for( k = 0; k < 2; k++ ) { - for( i = 0; i < subfr_length; i++ ) { - exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( - silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) ); - } - exc_buf_ptr += subfr_length; - } - /* Find the subframe with lowest energy of the last two and use that as random noise generator */ - silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length ); - silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length ); - RESTORE_STACK; -} - -static OPUS_INLINE void silk_PLC_conceal( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* O LPC residual signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, j, k; - opus_int lag, idx, sLTP_buf_idx, shift1, shift2; - opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30; - opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; - opus_int32 LPC_pred_Q10, LTP_pred_Q12; - opus_int16 rand_scale_Q14; - opus_int16 *B_Q14; - opus_int32 *sLPC_Q14_ptr; - opus_int16 A_Q12[ MAX_LPC_ORDER ]; -#ifdef SMALL_FOOTPRINT - opus_int16 *sLTP; -#else - VARDECL( opus_int16, sLTP ); -#endif - VARDECL( opus_int32, sLTP_Q14 ); - silk_PLC_struct *psPLC = &psDec->sPLC; - opus_int32 prevGain_Q10[2]; - SAVE_STACK; - - ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); -#ifdef SMALL_FOOTPRINT - /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */ - sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length; -#else - ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); -#endif - - prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6); - prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6); - - if( psDec->first_frame_after_reset ) { - silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) ); - } - - silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr); - - if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { - /* First sub-frame has lowest energy */ - rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, ( psPLC->nb_subfr - 1 ) * psPLC->subfr_length - RAND_BUF_SIZE ) ]; - } else { - /* Second sub-frame has lowest energy */ - rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, psPLC->nb_subfr * psPLC->subfr_length - RAND_BUF_SIZE ) ]; - } - - /* Set up Gain to random noise component */ - B_Q14 = psPLC->LTPCoef_Q14; - rand_scale_Q14 = psPLC->randScale_Q14; - - /* Set up attenuation gains */ - harm_Gain_Q15 = HARM_ATT_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; - if( psDec->prevSignalType == TYPE_VOICED ) { - rand_Gain_Q15 = PLC_RAND_ATTENUATE_V_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; - } else { - rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; - } - - /* LPC concealment. Apply BWE to previous LPC */ - silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) ); - - /* Preload LPC coeficients to array on stack. Gives small performance gain */ - silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); - - /* First Lost frame */ - if( psDec->lossCnt == 0 ) { - rand_scale_Q14 = 1 << 14; - - /* Reduce random noise Gain for voiced frames */ - if( psDec->prevSignalType == TYPE_VOICED ) { - for( i = 0; i < LTP_ORDER; i++ ) { - rand_scale_Q14 -= B_Q14[ i ]; - } - rand_scale_Q14 = silk_max_16( 3277, rand_scale_Q14 ); /* 0.2 */ - rand_scale_Q14 = (opus_int16)silk_RSHIFT( silk_SMULBB( rand_scale_Q14, psPLC->prevLTP_scale_Q14 ), 14 ); - } else { - /* Reduce random noise for unvoiced frames with high LPC gain */ - opus_int32 invGain_Q30, down_scale_Q30; - - invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order ); - - down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 ); - down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 ); - down_scale_Q30 = silk_LSHIFT( down_scale_Q30, LOG2_INV_LPC_GAIN_HIGH_THRES ); - - rand_Gain_Q15 = silk_RSHIFT( silk_SMULWB( down_scale_Q30, rand_Gain_Q15 ), 14 ); - } - } - - rand_seed = psPLC->rand_seed; - lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); - sLTP_buf_idx = psDec->ltp_mem_length; - - /* Rewhiten LTP state */ - idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; - silk_assert( idx > 0 ); - silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch ); - /* Scale LTP state */ - inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 ); - inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 ); - for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) { - sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] ); - } - - /***************************/ - /* LTP synthesis filtering */ - /***************************/ - for( k = 0; k < psDec->nb_subfr; k++ ) { - /* Set up pointer */ - pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - for( i = 0; i < psDec->subfr_length; i++ ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q12 = 2; - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ 0 ], B_Q14[ 0 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] ); - pred_lag_ptr++; - - /* Generate LPC excitation */ - rand_seed = silk_RAND( rand_seed ); - idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK; - sLTP_Q14[ sLTP_buf_idx ] = silk_LSHIFT32( silk_SMLAWB( LTP_pred_Q12, rand_ptr[ idx ], rand_scale_Q14 ), 2 ); - sLTP_buf_idx++; - } - - /* Gradually reduce LTP gain */ - for( j = 0; j < LTP_ORDER; j++ ) { - B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 ); - } - /* Gradually reduce excitation gain */ - rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 ); - - /* Slowly increase pitch lag */ - psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 ); - psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) ); - lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); - } - - /***************************/ - /* LPC synthesis filtering */ - /***************************/ - sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ]; - - /* Copy LPC state */ - silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - - silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */ - for( i = 0; i < psDec->frame_length; i++ ) { - /* partly unrolled */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); - for( j = 10; j < psDec->LPC_order; j++ ) { - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] ); - } - - /* Add prediction to LPC excitation */ - sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], - silk_LSHIFT_SAT32( LPC_pred_Q10, 4 )); - - /* Scale with Gain */ - frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) ); - } - - /* Save LPC state */ - silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); - - /**************************************/ - /* Update states */ - /**************************************/ - psPLC->rand_seed = rand_seed; - psPLC->randScale_Q14 = rand_scale_Q14; - for( i = 0; i < MAX_NB_SUBFR; i++ ) { - psDecCtrl->pitchL[ i ] = lag; - } - RESTORE_STACK; -} - -/* Glues concealed frames with new good received frames */ -void silk_PLC_glue_frames( - silk_decoder_state *psDec, /* I/O decoder state */ - opus_int16 frame[], /* I/O signal */ - opus_int length /* I length of signal */ -) -{ - opus_int i, energy_shift; - opus_int32 energy; - silk_PLC_struct *psPLC; - psPLC = &psDec->sPLC; - - if( psDec->lossCnt ) { - /* Calculate energy in concealed residual */ - silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, frame, length ); - - psPLC->last_frame_lost = 1; - } else { - if( psDec->sPLC.last_frame_lost ) { - /* Calculate residual in decoded signal if last frame was lost */ - silk_sum_sqr_shift( &energy, &energy_shift, frame, length ); - - /* Normalize energies */ - if( energy_shift > psPLC->conc_energy_shift ) { - psPLC->conc_energy = silk_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift ); - } else if( energy_shift < psPLC->conc_energy_shift ) { - energy = silk_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift ); - } - - /* Fade in the energy difference */ - if( energy > psPLC->conc_energy ) { - opus_int32 frac_Q24, LZ; - opus_int32 gain_Q16, slope_Q16; - - LZ = silk_CLZ32( psPLC->conc_energy ); - LZ = LZ - 1; - psPLC->conc_energy = silk_LSHIFT( psPLC->conc_energy, LZ ); - energy = silk_RSHIFT( energy, silk_max_32( 24 - LZ, 0 ) ); - - frac_Q24 = silk_DIV32( psPLC->conc_energy, silk_max( energy, 1 ) ); - - gain_Q16 = silk_LSHIFT( silk_SQRT_APPROX( frac_Q24 ), 4 ); - slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length ); - /* Make slope 4x steeper to avoid missing onsets after DTX */ - slope_Q16 = silk_LSHIFT( slope_Q16, 2 ); - - for( i = 0; i < length; i++ ) { - frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] ); - gain_Q16 += slope_Q16; - if( gain_Q16 > (opus_int32)1 << 16 ) { - break; - } - } - } - } - psPLC->last_frame_lost = 0; - } -} diff --git a/thirdparty/opus/silk/PLC.h b/thirdparty/opus/silk/PLC.h deleted file mode 100644 index 6438f51633..0000000000 --- a/thirdparty/opus/silk/PLC.h +++ /dev/null @@ -1,62 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_PLC_H -#define SILK_PLC_H - -#include "main.h" - -#define BWE_COEF 0.99 -#define V_PITCH_GAIN_START_MIN_Q14 11469 /* 0.7 in Q14 */ -#define V_PITCH_GAIN_START_MAX_Q14 15565 /* 0.95 in Q14 */ -#define MAX_PITCH_LAG_MS 18 -#define RAND_BUF_SIZE 128 -#define RAND_BUF_MASK ( RAND_BUF_SIZE - 1 ) -#define LOG2_INV_LPC_GAIN_HIGH_THRES 3 /* 2^3 = 8 dB LPC gain */ -#define LOG2_INV_LPC_GAIN_LOW_THRES 8 /* 2^8 = 24 dB LPC gain */ -#define PITCH_DRIFT_FAC_Q16 655 /* 0.01 in Q16 */ - -void silk_PLC_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -); - -void silk_PLC( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O signal */ - opus_int lost, /* I Loss flag */ - int arch /* I Run-time architecture */ -); - -void silk_PLC_glue_frames( - silk_decoder_state *psDec, /* I/O decoder state */ - opus_int16 frame[], /* I/O signal */ - opus_int length /* I length of signal */ -); - -#endif - diff --git a/thirdparty/opus/silk/SigProc_FIX.h b/thirdparty/opus/silk/SigProc_FIX.h deleted file mode 100644 index b63299441e..0000000000 --- a/thirdparty/opus/silk/SigProc_FIX.h +++ /dev/null @@ -1,615 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_H -#define SILK_SIGPROC_FIX_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */ - -#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */ - -#include /* for memset(), memcpy(), memmove() */ -#include "typedef.h" -#include "resampler_structs.h" -#include "macros.h" -#include "cpu_support.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/SigProc_FIX_sse.h" -#endif - -/********************************************************************/ -/* SIGNAL PROCESSING FUNCTIONS */ -/********************************************************************/ - -/*! - * Initialize/reset the resampler state for a given pair of input/output sampling rates -*/ -opus_int silk_resampler_init( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ - opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ - opus_int forEnc /* I If 1: encoder; if 0: decoder */ -); - -/*! - * Resampler: convert from one sampling rate to another - */ -opus_int silk_resampler( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -); - -/*! -* Downsample 2x, mediocre quality -*/ -void silk_resampler_down2( - opus_int32 *S, /* I/O State vector [ 2 ] */ - opus_int16 *out, /* O Output signal [ len ] */ - const opus_int16 *in, /* I Input signal [ floor(len/2) ] */ - opus_int32 inLen /* I Number of input samples */ -); - -/*! - * Downsample by a factor 2/3, low quality -*/ -void silk_resampler_down2_3( - opus_int32 *S, /* I/O State vector [ 6 ] */ - opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */ - const opus_int16 *in, /* I Input signal [ inLen ] */ - opus_int32 inLen /* I Number of input samples */ -); - -/*! - * second order ARMA filter; - * slower than biquad() but uses more precise coefficients - * can handle (slowly) varying coefficients - */ -void silk_biquad_alt( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len, /* I signal length (must be even) */ - opus_int stride /* I Operate on interleaved signal if > 1 */ -); - -/* Variable order MA prediction error filter. */ -void silk_LPC_analysis_filter( - opus_int16 *out, /* O Output signal */ - const opus_int16 *in, /* I Input signal */ - const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ - const opus_int32 len, /* I Signal length */ - const opus_int32 d, /* I Filter order */ - int arch /* I Run-time architecture */ -); - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander( - opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */ -); - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander_32( - opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor in Q16 */ -); - -/* Compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ - const opus_int order /* I Prediction order */ -); - -/* For input in Q24 domain */ -opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ - const opus_int order /* I Prediction order */ -); - -/* Split signal in two decimated bands using first-order allpass filters */ -void silk_ana_filt_bank_1( - const opus_int16 *in, /* I Input signal [N] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *outL, /* O Low band [N/2] */ - opus_int16 *outH, /* O High band [N/2] */ - const opus_int32 N /* I Number of input samples */ -); - -/********************************************************************/ -/* SCALAR FUNCTIONS */ -/********************************************************************/ - -/* Approximation of 128 * log2() (exact inverse of approx 2^() below) */ -/* Convert input to a log scale */ -opus_int32 silk_lin2log( - const opus_int32 inLin /* I input in linear scale */ -); - -/* Approximation of a sigmoid function */ -opus_int silk_sigm_Q15( - opus_int in_Q5 /* I */ -); - -/* Approximation of 2^() (exact inverse of approx log2() above) */ -/* Convert input to a linear scale */ -opus_int32 silk_log2lin( - const opus_int32 inLog_Q7 /* I input on log scale */ -); - -/* Compute number of bits to right shift the sum of squares of a vector */ -/* of int16s to make it fit in an int32 */ -void silk_sum_sqr_shift( - opus_int32 *energy, /* O Energy of x, after shifting to the right */ - opus_int *shift, /* O Number of bits right shift applied to energy */ - const opus_int16 *x, /* I Input vector */ - opus_int len /* I Length of input vector */ -); - -/* Calculates the reflection coefficients from the correlation sequence */ -/* Faster than schur64(), but much less accurate. */ -/* uses SMLAWB(), requiring armv5E and higher. */ -opus_int32 silk_schur( /* O Returns residual energy */ - opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ - const opus_int32 *c, /* I correlations [order+1] */ - const opus_int32 order /* I prediction order */ -); - -/* Calculates the reflection coefficients from the correlation sequence */ -/* Slower than schur(), but more accurate. */ -/* Uses SMULL(), available on armv4 */ -opus_int32 silk_schur64( /* O returns residual energy */ - opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */ - const opus_int32 c[], /* I Correlations [order+1] */ - opus_int32 order /* I Prediction order */ -); - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */ - const opus_int32 order /* I Prediction order */ -); - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a_Q16( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ - const opus_int32 order /* I Prediction order */ -); - -/* Apply sine window to signal vector. */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -/* every other sample of window is linearly interpolated, for speed */ -void silk_apply_sine_window( - opus_int16 px_win[], /* O Pointer to windowed signal */ - const opus_int16 px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -); - -/* Compute autocorrelation */ -void silk_autocorr( - opus_int32 *results, /* O Result (length correlationCount) */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *inputData, /* I Input data to correlate */ - const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount, /* I Number of correlation taps to compute */ - int arch /* I Run-time architecture */ -); - -void silk_decode_pitch( - opus_int16 lagIndex, /* I */ - opus_int8 contourIndex, /* O */ - opus_int pitch_lags[], /* O 4 pitch values */ - const opus_int Fs_kHz, /* I sampling frequency (kHz) */ - const opus_int nb_subfr /* I number of sub frames */ -); - -opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O 4 pitch lag values */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ - const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I Sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I number of 5 ms subframes */ - int arch /* I Run-time architecture */ -); - -/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ -/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ -void silk_A2NLSF( - opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ - opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ - const opus_int d /* I Filter order (must be even) */ -); - -/* compute whitening filter coefficients from normalized line spectral frequencies */ -void silk_NLSF2A( - opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ - const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ - const opus_int d /* I filter order (should be even) */ -); - -void silk_insertion_sort_increasing( - opus_int32 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -); - -void silk_insertion_sort_decreasing_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -); - -void silk_insertion_sort_increasing_all_values_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - const opus_int L /* I Vector length */ -); - -/* NLSF stabilizer, for a single input data vector */ -void silk_NLSF_stabilize( - opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */ - const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */ - const opus_int L /* I Number of NLSF parameters in the input vector */ -); - -/* Laroia low complexity NLSF weights */ -void silk_NLSF_VQ_weights_laroia( - opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ - const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ - const opus_int D /* I Input vector dimension (even) */ -); - -/* Compute reflection coefficients from input signal */ -void silk_burg_modified_c( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -); - -/* Copy and multiply a vector by a constant */ -void silk_scale_copy_vector16( - opus_int16 *data_out, - const opus_int16 *data_in, - opus_int32 gain_Q16, /* I Gain in Q16 */ - const opus_int dataSize /* I Length */ -); - -/* Some for the LTP related function requires Q26 to work.*/ -void silk_scale_vector32_Q26_lshift_18( - opus_int32 *data1, /* I/O Q0/Q18 */ - opus_int32 gain_Q26, /* I Q26 */ - opus_int dataSize /* I length */ -); - -/********************************************************************/ -/* INLINE ARM MATH */ -/********************************************************************/ - -/* return sum( inVec1[i] * inVec2[i] ) */ - -opus_int32 silk_inner_prod_aligned( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int len, /* I vector lengths */ - int arch /* I Run-time architecture */ -); - - -opus_int32 silk_inner_prod_aligned_scale( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int scale, /* I number of bits to shift */ - const opus_int len /* I vector lengths */ -); - -opus_int64 silk_inner_prod16_aligned_64_c( - const opus_int16 *inVec1, /* I input vector 1 */ - const opus_int16 *inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ -); - -/********************************************************************/ -/* MACROS */ -/********************************************************************/ - -/* Rotate a32 right by 'rot' bits. Negative rot values result in rotating - left. Output is 32bit int. - Note: contemporary compilers recognize the C expression below and - compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */ -static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot ) -{ - opus_uint32 x = (opus_uint32) a32; - opus_uint32 r = (opus_uint32) rot; - opus_uint32 m = (opus_uint32) -rot; - if( rot == 0 ) { - return a32; - } else if( rot < 0 ) { - return (opus_int32) ((x << m) | (x >> (32 - m))); - } else { - return (opus_int32) ((x << (32 - r)) | (x >> r)); - } -} - -/* Allocate opus_int16 aligned to 4-byte memory address */ -#if EMBEDDED_ARM -#define silk_DWORD_ALIGN __attribute__((aligned(4))) -#else -#define silk_DWORD_ALIGN -#endif - -/* Useful Macros that can be adjusted to other platforms */ -#define silk_memcpy(dest, src, size) memcpy((dest), (src), (size)) -#define silk_memset(dest, src, size) memset((dest), (src), (size)) -#define silk_memmove(dest, src, size) memmove((dest), (src), (size)) - -/* Fixed point macros */ - -/* (a32 * b32) output have to be 32bit int */ -#define silk_MUL(a32, b32) ((a32) * (b32)) - -/* (a32 * b32) output have to be 32bit uint */ -#define silk_MUL_uint(a32, b32) silk_MUL(a32, b32) - -/* a32 + (b32 * c32) output have to be 32bit int */ -#define silk_MLA(a32, b32, c32) silk_ADD32((a32),((b32) * (c32))) - -/* a32 + (b32 * c32) output have to be 32bit uint */ -#define silk_MLA_uint(a32, b32, c32) silk_MLA(a32, b32, c32) - -/* ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ -#define silk_SMULTT(a32, b32) (((a32) >> 16) * ((b32) >> 16)) - -/* a32 + ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ -#define silk_SMLATT(a32, b32, c32) silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16)) - -#define silk_SMLALBB(a64, b16, c16) silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16))) - -/* (a32 * b32) */ -#define silk_SMULL(a32, b32) ((opus_int64)(a32) * /*(opus_int64)*/(b32)) - -/* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour - (just standard two's complement implementation-specific behaviour) */ -#define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b))) -/* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour - (just standard two's complement implementation-specific behaviour) */ -#define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b))) - -/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ -#define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32)) -#define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))) - -#define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16))) -#define silk_DIV32(a32, b32) ((opus_int32)((a32) / (b32))) - -/* These macros enables checking for overflow in silk_API_Debug.h*/ -#define silk_ADD16(a, b) ((a) + (b)) -#define silk_ADD32(a, b) ((a) + (b)) -#define silk_ADD64(a, b) ((a) + (b)) - -#define silk_SUB16(a, b) ((a) - (b)) -#define silk_SUB32(a, b) ((a) - (b)) -#define silk_SUB64(a, b) ((a) - (b)) - -#define silk_SAT8(a) ((a) > silk_int8_MAX ? silk_int8_MAX : \ - ((a) < silk_int8_MIN ? silk_int8_MIN : (a))) -#define silk_SAT16(a) ((a) > silk_int16_MAX ? silk_int16_MAX : \ - ((a) < silk_int16_MIN ? silk_int16_MIN : (a))) -#define silk_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : \ - ((a) < silk_int32_MIN ? silk_int32_MIN : (a))) - -#define silk_CHECK_FIT8(a) (a) -#define silk_CHECK_FIT16(a) (a) -#define silk_CHECK_FIT32(a) (a) - -#define silk_ADD_SAT16(a, b) (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) ) -#define silk_ADD_SAT64(a, b) ((((a) + (b)) & 0x8000000000000000LL) == 0 ? \ - ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \ - ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) ) - -#define silk_SUB_SAT16(a, b) (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) ) -#define silk_SUB_SAT64(a, b) ((((a)-(b)) & 0x8000000000000000LL) == 0 ? \ - (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \ - ((((a)^0x8000000000000000LL) & (b) & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) ) - -/* Saturation for positive input values */ -#define silk_POS_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : (a)) - -/* Add with saturation for positive input values */ -#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))) -#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))) -#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))) -#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))) - -#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */ -#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */ -#define silk_LSHIFT32(a, shift) ((opus_int32)((opus_uint32)(a)<<(shift))) /* shift >= 0, shift < 32 */ -#define silk_LSHIFT64(a, shift) ((opus_int64)((opus_uint64)(a)<<(shift))) /* shift >= 0, shift < 64 */ -#define silk_LSHIFT(a, shift) silk_LSHIFT32(a, shift) /* shift >= 0, shift < 32 */ - -#define silk_RSHIFT8(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 8 */ -#define silk_RSHIFT16(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 16 */ -#define silk_RSHIFT32(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 32 */ -#define silk_RSHIFT64(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 64 */ -#define silk_RSHIFT(a, shift) silk_RSHIFT32(a, shift) /* shift >= 0, shift < 32 */ - -/* saturates before shifting */ -#define silk_LSHIFT_SAT32(a, shift) (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \ - silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) )) - -#define silk_LSHIFT_ovflw(a, shift) ((opus_int32)((opus_uint32)(a) << (shift))) /* shift >= 0, allowed to overflow */ -#define silk_LSHIFT_uint(a, shift) ((a) << (shift)) /* shift >= 0 */ -#define silk_RSHIFT_uint(a, shift) ((a) >> (shift)) /* shift >= 0 */ - -#define silk_ADD_LSHIFT(a, b, shift) ((a) + silk_LSHIFT((b), (shift))) /* shift >= 0 */ -#define silk_ADD_LSHIFT32(a, b, shift) silk_ADD32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ -#define silk_ADD_LSHIFT_uint(a, b, shift) ((a) + silk_LSHIFT_uint((b), (shift))) /* shift >= 0 */ -#define silk_ADD_RSHIFT(a, b, shift) ((a) + silk_RSHIFT((b), (shift))) /* shift >= 0 */ -#define silk_ADD_RSHIFT32(a, b, shift) silk_ADD32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ -#define silk_ADD_RSHIFT_uint(a, b, shift) ((a) + silk_RSHIFT_uint((b), (shift))) /* shift >= 0 */ -#define silk_SUB_LSHIFT32(a, b, shift) silk_SUB32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ -#define silk_SUB_RSHIFT32(a, b, shift) silk_SUB32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ - -/* Requires that shift > 0 */ -#define silk_RSHIFT_ROUND(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) -#define silk_RSHIFT_ROUND64(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) - -/* Number of rightshift required to fit the multiplication */ -#define silk_NSHIFT_MUL_32_32(a, b) ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) ) -#define silk_NSHIFT_MUL_16_16(a, b) ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) ) - - -#define silk_min(a, b) (((a) < (b)) ? (a) : (b)) -#define silk_max(a, b) (((a) > (b)) ? (a) : (b)) - -/* Macro to convert floating-point constants to fixed-point */ -#define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5)) - -/* silk_min() versions with typecast in the function call */ -static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) -{ - return (((a) < (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) -{ - return (((a) < (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) -{ - return (((a) < (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) -{ - return (((a) < (b)) ? (a) : (b)); -} - -/* silk_min() versions with typecast in the function call */ -static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) -{ - return (((a) > (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) -{ - return (((a) > (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) -{ - return (((a) > (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) -{ - return (((a) > (b)) ? (a) : (b)); -} - -#define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))) - -#define silk_LIMIT_int silk_LIMIT -#define silk_LIMIT_16 silk_LIMIT -#define silk_LIMIT_32 silk_LIMIT - -#define silk_abs(a) (((a) > 0) ? (a) : -(a)) /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ -#define silk_abs_int(a) (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1))) -#define silk_abs_int32(a) (((a) ^ ((a) >> 31)) - ((a) >> 31)) -#define silk_abs_int64(a) (((a) > 0) ? (a) : -(a)) - -#define silk_sign(a) ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )) - -/* PSEUDO-RANDOM GENERATOR */ -/* Make sure to store the result as the seed for the next call (also in between */ -/* frames), otherwise result won't be random at all. When only using some of the */ -/* bits, take the most significant bits by right-shifting. */ -#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165)) - -/* Add some multiplication functions that can be easily mapped to ARM. */ - -/* silk_SMMUL: Signed top word multiply. - ARMv6 2 instruction cycles. - ARMv3M+ 3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/ -/*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/ -/* the following seems faster on x86 */ -#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) - -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) - -#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len)) -#endif - -#include "Inlines.h" -#include "MacroCount.h" -#include "MacroDebug.h" - -#ifdef OPUS_ARM_INLINE_ASM -#include "arm/SigProc_FIX_armv4.h" -#endif - -#ifdef OPUS_ARM_INLINE_EDSP -#include "arm/SigProc_FIX_armv5e.h" -#endif - -#if defined(MIPSr1_ASM) -#include "mips/sigproc_fix_mipsr1.h" -#endif - - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_SIGPROC_FIX_H */ diff --git a/thirdparty/opus/silk/VAD.c b/thirdparty/opus/silk/VAD.c deleted file mode 100644 index 0a782af2f1..0000000000 --- a/thirdparty/opus/silk/VAD.c +++ /dev/null @@ -1,362 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/* Silk VAD noise level estimation */ -# if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE void silk_VAD_GetNoiseLevels( - const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -); -#endif - -/**********************************/ -/* Initialization of the Silk VAD */ -/**********************************/ -opus_int silk_VAD_Init( /* O Return value, 0 if success */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -) -{ - opus_int b, ret = 0; - - /* reset state memory */ - silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) ); - - /* init noise levels */ - /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */ - for( b = 0; b < VAD_N_BANDS; b++ ) { - psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 ); - } - - /* Initialize state */ - for( b = 0; b < VAD_N_BANDS; b++ ) { - psSilk_VAD->NL[ b ] = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] ); - psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] ); - } - psSilk_VAD->counter = 15; - - /* init smoothed energy-to-noise ratio*/ - for( b = 0; b < VAD_N_BANDS; b++ ) { - psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256; /* 100 * 256 --> 20 dB SNR */ - } - - return( ret ); -} - -/* Weighting factors for tilt measure */ -static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 }; - -/***************************************/ -/* Get the speech activity level in Q8 */ -/***************************************/ -opus_int silk_VAD_GetSA_Q8_c( /* O Return value, 0 if success */ - silk_encoder_state *psEncC, /* I/O Encoder state */ - const opus_int16 pIn[] /* I PCM input */ -) -{ - opus_int SA_Q15, pSNR_dB_Q7, input_tilt; - opus_int decimated_framelength1, decimated_framelength2; - opus_int decimated_framelength; - opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; - opus_int32 sumSquared, smooth_coef_Q16; - opus_int16 HPstateTmp; - VARDECL( opus_int16, X ); - opus_int32 Xnrg[ VAD_N_BANDS ]; - opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; - opus_int32 speech_nrg, x_tmp; - opus_int X_offset[ VAD_N_BANDS ]; - opus_int ret = 0; - silk_VAD_state *psSilk_VAD = &psEncC->sVAD; - SAVE_STACK; - - /* Safety checks */ - silk_assert( VAD_N_BANDS == 4 ); - silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); - silk_assert( psEncC->frame_length <= 512 ); - silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); - - /***********************/ - /* Filter and Decimate */ - /***********************/ - decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); - decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); - decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); - /* Decimate into 4 bands: - 0 L 3L L 3L 5L - - -- - -- -- - 8 8 2 4 4 - - [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | - - They're arranged to allow the minimal ( frame_length / 4 ) extra - scratch space during the downsampling process */ - X_offset[ 0 ] = 0; - X_offset[ 1 ] = decimated_framelength + decimated_framelength2; - X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; - X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; - ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); - - /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ - silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], - X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); - - /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], - X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); - - /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], - X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); - - /*********************************************/ - /* HP filter on lowest band (differentiator) */ - /*********************************************/ - X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); - HPstateTmp = X[ decimated_framelength - 1 ]; - for( i = decimated_framelength - 1; i > 0; i-- ) { - X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); - X[ i ] -= X[ i - 1 ]; - } - X[ 0 ] -= psSilk_VAD->HPstate; - psSilk_VAD->HPstate = HPstateTmp; - - /*************************************/ - /* Calculate the energy in each band */ - /*************************************/ - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Find the decimated framelength in the non-uniformly divided bands */ - decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); - - /* Split length into subframe lengths */ - dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); - dec_subframe_offset = 0; - - /* Compute energy per sub-frame */ - /* initialize with summed energy of last subframe */ - Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; - for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { - sumSquared = 0; - for( i = 0; i < dec_subframe_length; i++ ) { - /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ - /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ - x_tmp = silk_RSHIFT( - X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); - sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); - - /* Safety check */ - silk_assert( sumSquared >= 0 ); - } - - /* Add/saturate summed energy of current subframe */ - if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); - } else { - /* Look-ahead subframe */ - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); - } - - dec_subframe_offset += dec_subframe_length; - } - psSilk_VAD->XnrgSubfr[ b ] = sumSquared; - } - - /********************/ - /* Noise estimation */ - /********************/ - silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); - - /***********************************************/ - /* Signal-plus-noise to noise ratio estimation */ - /***********************************************/ - sumSquared = 0; - input_tilt = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; - if( speech_nrg > 0 ) { - /* Divide, with sufficient resolution */ - if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); - } else { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); - } - - /* Convert to log domain */ - SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; - - /* Sum-of-squares */ - sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ - - /* Tilt measure */ - if( speech_nrg < ( (opus_int32)1 << 20 ) ) { - /* Scale down SNR value for small subband speech energies */ - SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); - } - input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); - } else { - NrgToNoiseRatio_Q8[ b ] = 256; - } - } - - /* Mean-of-squares */ - sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ - - /* Root-mean-square approximation, scale to dBs, and write to output pointer */ - pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ - - /*********************************/ - /* Speech Probability Estimation */ - /*********************************/ - SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); - - /**************************/ - /* Frequency Tilt Measure */ - /**************************/ - psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); - - /**************************************************/ - /* Scale the sigmoid output based on power levels */ - /**************************************************/ - speech_nrg = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ - speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); - } - - /* Power scaling */ - if( speech_nrg <= 0 ) { - SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); - } else if( speech_nrg < 32768 ) { - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); - } else { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); - } - - /* square-root */ - speech_nrg = silk_SQRT_APPROX( speech_nrg ); - SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); - } - - /* Copy the resulting speech activity in Q8 */ - psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); - - /***********************************/ - /* Energy Level and SNR estimation */ - /***********************************/ - /* Smoothing coefficient */ - smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); - - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - smooth_coef_Q16 >>= 1; - } - - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* compute smoothed energy-to-noise ratio per band */ - psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ], - NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 ); - - /* signal to noise ratio in dB per band */ - SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 ); - /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ - psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); - } - - RESTORE_STACK; - return( ret ); -} - -/**************************/ -/* Noise level estimation */ -/**************************/ -# if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE -#endif -void silk_VAD_GetNoiseLevels( - const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -) -{ - opus_int k; - opus_int32 nl, nrg, inv_nrg; - opus_int coef, min_coef; - - /* Initially faster smoothing */ - if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */ - min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 ); - } else { - min_coef = 0; - } - - for( k = 0; k < VAD_N_BANDS; k++ ) { - /* Get old noise level estimate for current band */ - nl = psSilk_VAD->NL[ k ]; - silk_assert( nl >= 0 ); - - /* Add bias */ - nrg = silk_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] ); - silk_assert( nrg > 0 ); - - /* Invert energies */ - inv_nrg = silk_DIV32( silk_int32_MAX, nrg ); - silk_assert( inv_nrg >= 0 ); - - /* Less update when subband energy is high */ - if( nrg > silk_LSHIFT( nl, 3 ) ) { - coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3; - } else if( nrg < nl ) { - coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16; - } else { - coef = silk_SMULWB( silk_SMULWW( inv_nrg, nl ), VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 << 1 ); - } - - /* Initially faster smoothing */ - coef = silk_max_int( coef, min_coef ); - - /* Smooth inverse energies */ - psSilk_VAD->inv_NL[ k ] = silk_SMLAWB( psSilk_VAD->inv_NL[ k ], inv_nrg - psSilk_VAD->inv_NL[ k ], coef ); - silk_assert( psSilk_VAD->inv_NL[ k ] >= 0 ); - - /* Compute noise level by inverting again */ - nl = silk_DIV32( silk_int32_MAX, psSilk_VAD->inv_NL[ k ] ); - silk_assert( nl >= 0 ); - - /* Limit noise levels (guarantee 7 bits of head room) */ - nl = silk_min( nl, 0x00FFFFFF ); - - /* Store as part of state */ - psSilk_VAD->NL[ k ] = nl; - } - - /* Increment frame counter */ - psSilk_VAD->counter++; -} diff --git a/thirdparty/opus/silk/VQ_WMat_EC.c b/thirdparty/opus/silk/VQ_WMat_EC.c deleted file mode 100644 index 7983f1db80..0000000000 --- a/thirdparty/opus/silk/VQ_WMat_EC.c +++ /dev/null @@ -1,120 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ -void silk_VQ_WMat_EC_c( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -) -{ - opus_int k, gain_tmp_Q7; - const opus_int8 *cb_row_Q7; - opus_int16 diff_Q14[ 5 ]; - opus_int32 sum1_Q14, sum2_Q16; - - /* Loop over codebook */ - *rate_dist_Q14 = silk_int32_MAX; - cb_row_Q7 = cb_Q7; - for( k = 0; k < L; k++ ) { - gain_tmp_Q7 = cb_gain_Q7[k]; - - diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); - diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); - diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 ); - diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 ); - diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 ); - - /* Weighted rate */ - sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); - - /* Penalty for too large gain */ - sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); - - silk_assert( sum1_Q14 >= 0 ); - - /* first row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); - - /* second row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); - - /* third row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); - - /* fourth row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); - - /* last row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); - - silk_assert( sum1_Q14 >= 0 ); - - /* find best */ - if( sum1_Q14 < *rate_dist_Q14 ) { - *rate_dist_Q14 = sum1_Q14; - *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; - } - - /* Go to next cbk vector */ - cb_row_Q7 += LTP_ORDER; - } -} diff --git a/thirdparty/opus/silk/ana_filt_bank_1.c b/thirdparty/opus/silk/ana_filt_bank_1.c deleted file mode 100644 index 24cfb03fdb..0000000000 --- a/thirdparty/opus/silk/ana_filt_bank_1.c +++ /dev/null @@ -1,74 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Coefficients for 2-band filter bank based on first-order allpass filters */ -static opus_int16 A_fb1_20 = 5394 << 1; -static opus_int16 A_fb1_21 = -24290; /* (opus_int16)(20623 << 1) */ - -/* Split signal into two decimated bands using first-order allpass filters */ -void silk_ana_filt_bank_1( - const opus_int16 *in, /* I Input signal [N] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *outL, /* O Low band [N/2] */ - opus_int16 *outH, /* O High band [N/2] */ - const opus_int32 N /* I Number of input samples */ -) -{ - opus_int k, N2 = silk_RSHIFT( N, 1 ); - opus_int32 in32, X, Y, out_1, out_2; - - /* Internal variables and state are in Q10 format */ - for( k = 0; k < N2; k++ ) { - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 ); - - /* All-pass section for even input sample */ - Y = silk_SUB32( in32, S[ 0 ] ); - X = silk_SMLAWB( Y, Y, A_fb1_21 ); - out_1 = silk_ADD32( S[ 0 ], X ); - S[ 0 ] = silk_ADD32( in32, X ); - - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 ); - - /* All-pass section for odd input sample, and add to output of previous section */ - Y = silk_SUB32( in32, S[ 1 ] ); - X = silk_SMULWB( Y, A_fb1_20 ); - out_2 = silk_ADD32( S[ 1 ], X ); - S[ 1 ] = silk_ADD32( in32, X ); - - /* Add/subtract, convert back to int16 and store to output */ - outL[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_ADD32( out_2, out_1 ), 11 ) ); - outH[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SUB32( out_2, out_1 ), 11 ) ); - } -} diff --git a/thirdparty/opus/silk/arm/NSQ_neon.c b/thirdparty/opus/silk/arm/NSQ_neon.c deleted file mode 100644 index 9642529973..0000000000 --- a/thirdparty/opus/silk/arm/NSQ_neon.c +++ /dev/null @@ -1,112 +0,0 @@ -/*********************************************************************** -Copyright (C) 2014 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "main.h" -#include "stack_alloc.h" -#include "NSQ.h" -#include "celt/cpu_support.h" -#include "celt/arm/armcpu.h" - -opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order) -{ - int32x4_t coef0 = vld1q_s32(coef32); - int32x4_t coef1 = vld1q_s32(coef32 + 4); - int32x4_t coef2 = vld1q_s32(coef32 + 8); - int32x4_t coef3 = vld1q_s32(coef32 + 12); - - int32x4_t a0 = vld1q_s32(buf32 - 15); - int32x4_t a1 = vld1q_s32(buf32 - 11); - int32x4_t a2 = vld1q_s32(buf32 - 7); - int32x4_t a3 = vld1q_s32(buf32 - 3); - - int32x4_t b0 = vqdmulhq_s32(coef0, a0); - int32x4_t b1 = vqdmulhq_s32(coef1, a1); - int32x4_t b2 = vqdmulhq_s32(coef2, a2); - int32x4_t b3 = vqdmulhq_s32(coef3, a3); - - int32x4_t c0 = vaddq_s32(b0, b1); - int32x4_t c1 = vaddq_s32(b2, b3); - - int32x4_t d = vaddq_s32(c0, c1); - - int64x2_t e = vpaddlq_s32(d); - - int64x1_t f = vadd_s64(vget_low_s64(e), vget_high_s64(e)); - - opus_int32 out = vget_lane_s32(vreinterpret_s32_s64(f), 0); - - out += silk_RSHIFT( order, 1 ); - - return out; -} - - -opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order) -{ - opus_int32 out; - if (order == 8) - { - int32x4_t a00 = vdupq_n_s32(data0[0]); - int32x4_t a01 = vld1q_s32(data1); /* data1[0] ... [3] */ - - int32x4_t a0 = vextq_s32 (a00, a01, 3); /* data0[0] data1[0] ...[2] */ - int32x4_t a1 = vld1q_s32(data1 + 3); /* data1[3] ... [6] */ - - /*TODO: Convert these once in advance instead of once per sample, like - silk_noise_shape_quantizer_short_prediction_neon() does.*/ - int16x8_t coef16 = vld1q_s16(coef); - int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16)); - int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16)); - - /*This is not bit-exact with the C version, since we do not drop the - lower 16 bits of each multiply, but wait until the end to truncate - precision. This is an encoder-specific calculation (and unlike - silk_noise_shape_quantizer_short_prediction_neon(), is not meant to - simulate what the decoder will do). We still could use vqdmulhq_s32() - like silk_noise_shape_quantizer_short_prediction_neon() and save - half the multiplies, but the speed difference is not large, since we - then need two extra adds.*/ - int64x2_t b0 = vmull_s32(vget_low_s32(a0), vget_low_s32(coef0)); - int64x2_t b1 = vmlal_s32(b0, vget_high_s32(a0), vget_high_s32(coef0)); - int64x2_t b2 = vmlal_s32(b1, vget_low_s32(a1), vget_low_s32(coef1)); - int64x2_t b3 = vmlal_s32(b2, vget_high_s32(a1), vget_high_s32(coef1)); - - int64x1_t c = vadd_s64(vget_low_s64(b3), vget_high_s64(b3)); - int64x1_t cS = vrshr_n_s64(c, 15); - int32x2_t d = vreinterpret_s32_s64(cS); - - out = vget_lane_s32(d, 0); - vst1q_s32(data1, a0); - vst1q_s32(data1 + 4, a1); - return out; - } - return silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order); -} diff --git a/thirdparty/opus/silk/arm/NSQ_neon.h b/thirdparty/opus/silk/arm/NSQ_neon.h deleted file mode 100644 index 77c946af85..0000000000 --- a/thirdparty/opus/silk/arm/NSQ_neon.h +++ /dev/null @@ -1,113 +0,0 @@ -/*********************************************************************** -Copyright (C) 2014 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifndef SILK_NSQ_NEON_H -#define SILK_NSQ_NEON_H - -#include "cpu_support.h" - -#undef silk_short_prediction_create_arch_coef -/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */ -static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order) -{ - out[15] = in[0] << 15; - out[14] = in[1] << 15; - out[13] = in[2] << 15; - out[12] = in[3] << 15; - out[11] = in[4] << 15; - out[10] = in[5] << 15; - out[9] = in[6] << 15; - out[8] = in[7] << 15; - out[7] = in[8] << 15; - out[6] = in[9] << 15; - - if (order == 16) - { - out[5] = in[10] << 15; - out[4] = in[11] << 15; - out[3] = in[12] << 15; - out[2] = in[13] << 15; - out[1] = in[14] << 15; - out[0] = in[15] << 15; - } - else - { - out[5] = 0; - out[4] = 0; - out[3] = 0; - out[2] = 0; - out[1] = 0; - out[0] = 0; - } -} - -#if defined(OPUS_ARM_PRESUME_NEON_INTR) - -#define silk_short_prediction_create_arch_coef(out, in, order) \ - (silk_short_prediction_create_arch_coef_neon(out, in, order)) - -#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - -#define silk_short_prediction_create_arch_coef(out, in, order) \ - do { if (arch == OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0) - -#endif - -opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order); - -opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order); - -#if defined(OPUS_ARM_PRESUME_NEON_INTR) -#undef silk_noise_shape_quantizer_short_prediction -#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \ - ((void)arch,silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order)) - -#undef silk_NSQ_noise_shape_feedback_loop -#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_neon(data0, data1, coef, order)) - -#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - -/* silk_noise_shape_quantizer_short_prediction implementations take different parameters based on arch - (coef vs. coefRev) so can't use the usual IMPL table implementation */ -#undef silk_noise_shape_quantizer_short_prediction -#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \ - (arch == OPUS_ARCH_ARM_NEON ? \ - silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order) : \ - silk_noise_shape_quantizer_short_prediction_c(in, coef, order)) - -extern opus_int32 - (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])( - const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, - opus_int order); - -#undef silk_NSQ_noise_shape_feedback_loop -#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) \ - (SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[(arch)&OPUS_ARCHMASK](data0, data1, \ - coef, order)) - -#endif - -#endif /* SILK_NSQ_NEON_H */ diff --git a/thirdparty/opus/silk/arm/SigProc_FIX_armv4.h b/thirdparty/opus/silk/arm/SigProc_FIX_armv4.h deleted file mode 100644 index ff62b1e5d6..0000000000 --- a/thirdparty/opus/silk/arm/SigProc_FIX_armv4.h +++ /dev/null @@ -1,47 +0,0 @@ -/*********************************************************************** -Copyright (C) 2013 Xiph.Org Foundation and contributors -Copyright (c) 2013 Parrot -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_ARMv4_H -#define SILK_SIGPROC_FIX_ARMv4_H - -#undef silk_MLA -static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - opus_int32 res; - __asm__( - "#silk_MLA\n\t" - "mla %0, %1, %2, %3\n\t" - : "=&r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c)) - -#endif diff --git a/thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h b/thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h deleted file mode 100644 index 617a09cab1..0000000000 --- a/thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h +++ /dev/null @@ -1,61 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Copyright (c) 2013 Parrot -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_ARMv5E_H -#define SILK_SIGPROC_FIX_ARMv5E_H - -#undef silk_SMULTT -static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) -{ - opus_int32 res; - __asm__( - "#silk_SMULTT\n\t" - "smultt %0, %1, %2\n\t" - : "=r"(res) - : "%r"(a), "r"(b) - ); - return res; -} -#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b)) - -#undef silk_SMLATT -static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - opus_int32 res; - __asm__( - "#silk_SMLATT\n\t" - "smlatt %0, %1, %2, %3\n\t" - : "=r"(res) - : "%r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c)) - -#endif diff --git a/thirdparty/opus/silk/arm/arm_silk_map.c b/thirdparty/opus/silk/arm/arm_silk_map.c deleted file mode 100644 index 9bd86a7b21..0000000000 --- a/thirdparty/opus/silk/arm/arm_silk_map.c +++ /dev/null @@ -1,55 +0,0 @@ -/*********************************************************************** -Copyright (C) 2014 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "NSQ.h" - -#if defined(OPUS_HAVE_RTCD) - -# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \ - !defined(OPUS_ARM_PRESUME_NEON_INTR)) - -/*There is no table for silk_noise_shape_quantizer_short_prediction because the - NEON version takes different parameters than the C version. - Instead RTCD is done via if statements at the call sites. - See NSQ_neon.h for details.*/ - -opus_int32 - (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])( - const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, - opus_int order) = { - silk_NSQ_noise_shape_feedback_loop_c, /* ARMv4 */ - silk_NSQ_noise_shape_feedback_loop_c, /* EDSP */ - silk_NSQ_noise_shape_feedback_loop_c, /* Media */ - silk_NSQ_noise_shape_feedback_loop_neon, /* NEON */ -}; - -# endif - -#endif /* OPUS_HAVE_RTCD */ diff --git a/thirdparty/opus/silk/arm/macros_arm64.h b/thirdparty/opus/silk/arm/macros_arm64.h deleted file mode 100644 index ed030413c5..0000000000 --- a/thirdparty/opus/silk/arm/macros_arm64.h +++ /dev/null @@ -1,39 +0,0 @@ -/*********************************************************************** -Copyright (C) 2015 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_ARM64_H -#define SILK_MACROS_ARM64_H - -#include - -#undef silk_ADD_SAT32 -#define silk_ADD_SAT32(a, b) (vqadds_s32((a), (b))) - -#undef silk_SUB_SAT32 -#define silk_SUB_SAT32(a, b) (vqsubs_s32((a), (b))) - -#endif /* SILK_MACROS_ARM64_H */ diff --git a/thirdparty/opus/silk/arm/macros_armv4.h b/thirdparty/opus/silk/arm/macros_armv4.h deleted file mode 100644 index 3f30e97288..0000000000 --- a/thirdparty/opus/silk/arm/macros_armv4.h +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (C) 2013 Xiph.Org Foundation and contributors. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_ARMv4_H -#define SILK_MACROS_ARMv4_H - -/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ -#undef silk_SMULWB -static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMULWB\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(a), "r"(b<<16) - ); - return rd_hi; -} -#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b)) - -/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ -#undef silk_SMLAWB -#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c)) - -/* (a32 * (b32 >> 16)) >> 16 */ -#undef silk_SMULWT -static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMULWT\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(a), "r"(b&~0xFFFF) - ); - return rd_hi; -} -#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b)) - -/* a32 + (b32 * (c32 >> 16)) >> 16 */ -#undef silk_SMLAWT -#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c)) - -/* (a32 * b32) >> 16 */ -#undef silk_SMULWW -static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMULWW\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(a), "r"(b) - ); - return (rd_hi<<16)+(rd_lo>>16); -} -#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) - -#undef silk_SMLAWW -static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMLAWW\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b), "r"(c) - ); - return a+(rd_hi<<16)+(rd_lo>>16); -} -#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c)) - -#endif /* SILK_MACROS_ARMv4_H */ diff --git a/thirdparty/opus/silk/arm/macros_armv5e.h b/thirdparty/opus/silk/arm/macros_armv5e.h deleted file mode 100644 index aad4117e46..0000000000 --- a/thirdparty/opus/silk/arm/macros_armv5e.h +++ /dev/null @@ -1,213 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Copyright (c) 2013 Parrot -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_ARMv5E_H -#define SILK_MACROS_ARMv5E_H - -/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ -#undef silk_SMULWB -static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b) -{ - int res; - __asm__( - "#silk_SMULWB\n\t" - "smulwb %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b)) - -/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ -#undef silk_SMLAWB -static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b, - opus_int16 c) -{ - int res; - __asm__( - "#silk_SMLAWB\n\t" - "smlawb %0, %1, %2, %3\n\t" - : "=r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c)) - -/* (a32 * (b32 >> 16)) >> 16 */ -#undef silk_SMULWT -static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SMULWT\n\t" - "smulwt %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b)) - -/* a32 + (b32 * (c32 >> 16)) >> 16 */ -#undef silk_SMLAWT -static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - int res; - __asm__( - "#silk_SMLAWT\n\t" - "smlawt %0, %1, %2, %3\n\t" - : "=r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c)) - -/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ -#undef silk_SMULBB -static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SMULBB\n\t" - "smulbb %0, %1, %2\n\t" - : "=r"(res) - : "%r"(a), "r"(b) - ); - return res; -} -#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b)) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ -#undef silk_SMLABB -static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - int res; - __asm__( - "#silk_SMLABB\n\t" - "smlabb %0, %1, %2, %3\n\t" - : "=r"(res) - : "%r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c)) - -/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ -#undef silk_SMULBT -static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SMULBT\n\t" - "smulbt %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b)) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ -#undef silk_SMLABT -static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - int res; - __asm__( - "#silk_SMLABT\n\t" - "smlabt %0, %1, %2, %3\n\t" - : "=r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c)) - -/* add/subtract with output saturated */ -#undef silk_ADD_SAT32 -static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_ADD_SAT32\n\t" - "qadd %0, %1, %2\n\t" - : "=r"(res) - : "%r"(a), "r"(b) - ); - return res; -} -#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b)) - -#undef silk_SUB_SAT32 -static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SUB_SAT32\n\t" - "qsub %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b)) - -#undef silk_CLZ16 -static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16) -{ - int res; - __asm__( - "#silk_CLZ16\n\t" - "clz %0, %1;\n" - : "=r"(res) - : "r"(in16<<16|0x8000) - ); - return res; -} -#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16)) - -#undef silk_CLZ32 -static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32) -{ - int res; - __asm__( - "#silk_CLZ32\n\t" - "clz %0, %1\n\t" - : "=r"(res) - : "r"(in32) - ); - return res; -} -#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32)) - -#endif /* SILK_MACROS_ARMv5E_H */ diff --git a/thirdparty/opus/silk/biquad_alt.c b/thirdparty/opus/silk/biquad_alt.c deleted file mode 100644 index d55f5ee92e..0000000000 --- a/thirdparty/opus/silk/biquad_alt.c +++ /dev/null @@ -1,78 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -/* * - * silk_biquad_alt.c * - * * - * Second order ARMA filter * - * Can handle slowly varying filter coefficients * - * */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Second order ARMA filter, alternative implementation */ -void silk_biquad_alt( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len, /* I signal length (must be even) */ - opus_int stride /* I Operate on interleaved signal if > 1 */ -) -{ - /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ - opus_int k; - opus_int32 inval, A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14; - - /* Negate A_Q28 values and split in two parts */ - A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */ - A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */ - A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */ - A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */ - - for( k = 0; k < len; k++ ) { - /* S[ 0 ], S[ 1 ]: Q12 */ - inval = in[ k * stride ]; - out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 ); - - S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 ); - S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14, A0_U_Q28 ); - S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], inval); - - S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A1_L_Q28 ), 14 ); - S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14, A1_U_Q28 ); - S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval ); - - /* Scale back to Q0 and saturate */ - out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) ); - } -} diff --git a/thirdparty/opus/silk/bwexpander.c b/thirdparty/opus/silk/bwexpander.c deleted file mode 100644 index 2eb4456695..0000000000 --- a/thirdparty/opus/silk/bwexpander.c +++ /dev/null @@ -1,51 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander( - opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */ -) -{ - opus_int i; - opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536; - - /* NB: Dont use silk_SMULWB, instead of silk_RSHIFT_ROUND( silk_MUL(), 16 ), below. */ - /* Bias in silk_SMULWB can lead to unstable filters */ - for( i = 0; i < d - 1; i++ ) { - ar[ i ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ] ), 16 ); - chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); - } - ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 ); -} diff --git a/thirdparty/opus/silk/bwexpander_32.c b/thirdparty/opus/silk/bwexpander_32.c deleted file mode 100644 index d0010f73df..0000000000 --- a/thirdparty/opus/silk/bwexpander_32.c +++ /dev/null @@ -1,50 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander_32( - opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor in Q16 */ -) -{ - opus_int i; - opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536; - - for( i = 0; i < d - 1; i++ ) { - ar[ i ] = silk_SMULWW( chirp_Q16, ar[ i ] ); - chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); - } - ar[ d - 1 ] = silk_SMULWW( chirp_Q16, ar[ d - 1 ] ); -} - diff --git a/thirdparty/opus/silk/check_control_input.c b/thirdparty/opus/silk/check_control_input.c deleted file mode 100644 index b5de9ce48d..0000000000 --- a/thirdparty/opus/silk/check_control_input.c +++ /dev/null @@ -1,106 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "control.h" -#include "errors.h" - -/* Check encoder control struct */ -opus_int check_control_input( - silk_EncControlStruct *encControl /* I Control structure */ -) -{ - silk_assert( encControl != NULL ); - - if( ( ( encControl->API_sampleRate != 8000 ) && - ( encControl->API_sampleRate != 12000 ) && - ( encControl->API_sampleRate != 16000 ) && - ( encControl->API_sampleRate != 24000 ) && - ( encControl->API_sampleRate != 32000 ) && - ( encControl->API_sampleRate != 44100 ) && - ( encControl->API_sampleRate != 48000 ) ) || - ( ( encControl->desiredInternalSampleRate != 8000 ) && - ( encControl->desiredInternalSampleRate != 12000 ) && - ( encControl->desiredInternalSampleRate != 16000 ) ) || - ( ( encControl->maxInternalSampleRate != 8000 ) && - ( encControl->maxInternalSampleRate != 12000 ) && - ( encControl->maxInternalSampleRate != 16000 ) ) || - ( ( encControl->minInternalSampleRate != 8000 ) && - ( encControl->minInternalSampleRate != 12000 ) && - ( encControl->minInternalSampleRate != 16000 ) ) || - ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) || - ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) || - ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) { - silk_assert( 0 ); - return SILK_ENC_FS_NOT_SUPPORTED; - } - if( encControl->payloadSize_ms != 10 && - encControl->payloadSize_ms != 20 && - encControl->payloadSize_ms != 40 && - encControl->payloadSize_ms != 60 ) { - silk_assert( 0 ); - return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; - } - if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_LOSS_RATE; - } - if( encControl->useDTX < 0 || encControl->useDTX > 1 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_DTX_SETTING; - } - if( encControl->useCBR < 0 || encControl->useCBR > 1 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_CBR_SETTING; - } - if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_INBAND_FEC_SETTING; - } - if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; - } - if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; - } - if( encControl->nChannelsInternal > encControl->nChannelsAPI ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; - } - if( encControl->complexity < 0 || encControl->complexity > 10 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_COMPLEXITY_SETTING; - } - - return SILK_NO_ERROR; -} diff --git a/thirdparty/opus/silk/code_signs.c b/thirdparty/opus/silk/code_signs.c deleted file mode 100644 index dfd1dca9a1..0000000000 --- a/thirdparty/opus/silk/code_signs.c +++ /dev/null @@ -1,115 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/*#define silk_enc_map(a) ((a) > 0 ? 1 : 0)*/ -/*#define silk_dec_map(a) ((a) > 0 ? 1 : -1)*/ -/* shifting avoids if-statement */ -#define silk_enc_map(a) ( silk_RSHIFT( (a), 15 ) + 1 ) -#define silk_dec_map(a) ( silk_LSHIFT( (a), 1 ) - 1 ) - -/* Encodes signs of excitation */ -void silk_encode_signs( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - const opus_int8 pulses[], /* I pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -) -{ - opus_int i, j, p; - opus_uint8 icdf[ 2 ]; - const opus_int8 *q_ptr; - const opus_uint8 *icdf_ptr; - - icdf[ 1 ] = 0; - q_ptr = pulses; - i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) ); - icdf_ptr = &silk_sign_iCDF[ i ]; - length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH ); - for( i = 0; i < length; i++ ) { - p = sum_pulses[ i ]; - if( p > 0 ) { - icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ]; - for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) { - if( q_ptr[ j ] != 0 ) { - ec_enc_icdf( psRangeEnc, silk_enc_map( q_ptr[ j ]), icdf, 8 ); - } - } - } - q_ptr += SHELL_CODEC_FRAME_LENGTH; - } -} - -/* Decodes signs of excitation */ -void silk_decode_signs( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* I/O pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -) -{ - opus_int i, j, p; - opus_uint8 icdf[ 2 ]; - opus_int16 *q_ptr; - const opus_uint8 *icdf_ptr; - - icdf[ 1 ] = 0; - q_ptr = pulses; - i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) ); - icdf_ptr = &silk_sign_iCDF[ i ]; - length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH ); - for( i = 0; i < length; i++ ) { - p = sum_pulses[ i ]; - if( p > 0 ) { - icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ]; - for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) { - if( q_ptr[ j ] > 0 ) { - /* attach sign */ -#if 0 - /* conditional implementation */ - if( ec_dec_icdf( psRangeDec, icdf, 8 ) == 0 ) { - q_ptr[ j ] = -q_ptr[ j ]; - } -#else - /* implementation with shift, subtraction, multiplication */ - q_ptr[ j ] *= silk_dec_map( ec_dec_icdf( psRangeDec, icdf, 8 ) ); -#endif - } - } - } - q_ptr += SHELL_CODEC_FRAME_LENGTH; - } -} diff --git a/thirdparty/opus/silk/control.h b/thirdparty/opus/silk/control.h deleted file mode 100644 index 747e5426a0..0000000000 --- a/thirdparty/opus/silk/control.h +++ /dev/null @@ -1,142 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_CONTROL_H -#define SILK_CONTROL_H - -#include "typedef.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Decoder API flags */ -#define FLAG_DECODE_NORMAL 0 -#define FLAG_PACKET_LOST 1 -#define FLAG_DECODE_LBRR 2 - -/***********************************************/ -/* Structure for controlling encoder operation */ -/***********************************************/ -typedef struct { - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsAPI; - - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsInternal; - - /* I: Input signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */ - opus_int32 API_sampleRate; - - /* I: Maximum internal sampling rate in Hertz; 8000/12000/16000 */ - opus_int32 maxInternalSampleRate; - - /* I: Minimum internal sampling rate in Hertz; 8000/12000/16000 */ - opus_int32 minInternalSampleRate; - - /* I: Soft request for internal sampling rate in Hertz; 8000/12000/16000 */ - opus_int32 desiredInternalSampleRate; - - /* I: Number of samples per packet in milliseconds; 10/20/40/60 */ - opus_int payloadSize_ms; - - /* I: Bitrate during active speech in bits/second; internally limited */ - opus_int32 bitRate; - - /* I: Uplink packet loss in percent (0-100) */ - opus_int packetLossPercentage; - - /* I: Complexity mode; 0 is lowest, 10 is highest complexity */ - opus_int complexity; - - /* I: Flag to enable in-band Forward Error Correction (FEC); 0/1 */ - opus_int useInBandFEC; - - /* I: Flag to enable discontinuous transmission (DTX); 0/1 */ - opus_int useDTX; - - /* I: Flag to use constant bitrate */ - opus_int useCBR; - - /* I: Maximum number of bits allowed for the frame */ - opus_int maxBits; - - /* I: Causes a smooth downmix to mono */ - opus_int toMono; - - /* I: Opus encoder is allowing us to switch bandwidth */ - opus_int opusCanSwitch; - - /* I: Make frames as independent as possible (but still use LPC) */ - opus_int reducedDependency; - - /* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */ - opus_int32 internalSampleRate; - - /* O: Flag that bandwidth switching is allowed (because low voice activity) */ - opus_int allowBandwidthSwitch; - - /* O: Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */ - opus_int inWBmodeWithoutVariableLP; - - /* O: Stereo width */ - opus_int stereoWidth_Q14; - - /* O: Tells the Opus encoder we're ready to switch */ - opus_int switchReady; - -} silk_EncControlStruct; - -/**************************************************************************/ -/* Structure for controlling decoder operation and reading decoder status */ -/**************************************************************************/ -typedef struct { - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsAPI; - - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsInternal; - - /* I: Output signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */ - opus_int32 API_sampleRate; - - /* I: Internal sampling rate used, in Hertz; 8000/12000/16000 */ - opus_int32 internalSampleRate; - - /* I: Number of samples per packet in milliseconds; 10/20/40/60 */ - opus_int payloadSize_ms; - - /* O: Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz */ - opus_int prevPitchLag; -} silk_DecControlStruct; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/control_SNR.c b/thirdparty/opus/silk/control_SNR.c deleted file mode 100644 index cee87eb0d8..0000000000 --- a/thirdparty/opus/silk/control_SNR.c +++ /dev/null @@ -1,76 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "tuning_parameters.h" - -/* Control SNR of redidual quantizer */ -opus_int silk_control_SNR( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - opus_int32 TargetRate_bps /* I Target max bitrate (bps) */ -) -{ - opus_int k, ret = SILK_NO_ERROR; - opus_int32 frac_Q6; - const opus_int32 *rateTable; - - /* Set bitrate/coding quality */ - TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS ); - if( TargetRate_bps != psEncC->TargetRate_bps ) { - psEncC->TargetRate_bps = TargetRate_bps; - - /* If new TargetRate_bps, translate to SNR_dB value */ - if( psEncC->fs_kHz == 8 ) { - rateTable = silk_TargetRate_table_NB; - } else if( psEncC->fs_kHz == 12 ) { - rateTable = silk_TargetRate_table_MB; - } else { - rateTable = silk_TargetRate_table_WB; - } - - /* Reduce bitrate for 10 ms modes in these calculations */ - if( psEncC->nb_subfr == 2 ) { - TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS; - } - - /* Find bitrate interval in table and interpolate */ - for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) { - if( TargetRate_bps <= rateTable[ k ] ) { - frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), - rateTable[ k ] - rateTable[ k - 1 ] ); - psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] ); - break; - } - } - } - - return ret; -} diff --git a/thirdparty/opus/silk/control_audio_bandwidth.c b/thirdparty/opus/silk/control_audio_bandwidth.c deleted file mode 100644 index 4f9bc5cbda..0000000000 --- a/thirdparty/opus/silk/control_audio_bandwidth.c +++ /dev/null @@ -1,126 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "tuning_parameters.h" - -/* Control internal sampling rate */ -opus_int silk_control_audio_bandwidth( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl /* I Control structure */ -) -{ - opus_int fs_kHz; - opus_int32 fs_Hz; - - fs_kHz = psEncC->fs_kHz; - fs_Hz = silk_SMULBB( fs_kHz, 1000 ); - if( fs_Hz == 0 ) { - /* Encoder has just been initialized */ - fs_Hz = silk_min( psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz ); - fs_kHz = silk_DIV32_16( fs_Hz, 1000 ); - } else if( fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz ) { - /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */ - fs_Hz = psEncC->API_fs_Hz; - fs_Hz = silk_min( fs_Hz, psEncC->maxInternal_fs_Hz ); - fs_Hz = silk_max( fs_Hz, psEncC->minInternal_fs_Hz ); - fs_kHz = silk_DIV32_16( fs_Hz, 1000 ); - } else { - /* State machine for the internal sampling rate switching */ - if( psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES ) { - /* Stop transition phase */ - psEncC->sLP.mode = 0; - } - if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) { - /* Check if we should switch down */ - if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz ) - { - /* Switch down */ - if( psEncC->sLP.mode == 0 ) { - /* New transition */ - psEncC->sLP.transition_frame_no = TRANSITION_FRAMES; - - /* Reset transition filter state */ - silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); - } - if( encControl->opusCanSwitch ) { - /* Stop transition phase */ - psEncC->sLP.mode = 0; - - /* Switch to a lower sample frequency */ - fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8; - } else { - if( psEncC->sLP.transition_frame_no <= 0 ) { - encControl->switchReady = 1; - /* Make room for redundancy */ - encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 ); - } else { - /* Direction: down (at double speed) */ - psEncC->sLP.mode = -2; - } - } - } - else - /* Check if we should switch up */ - if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz ) - { - /* Switch up */ - if( encControl->opusCanSwitch ) { - /* Switch to a higher sample frequency */ - fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16; - - /* New transition */ - psEncC->sLP.transition_frame_no = 0; - - /* Reset transition filter state */ - silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); - - /* Direction: up */ - psEncC->sLP.mode = 1; - } else { - if( psEncC->sLP.mode == 0 ) { - encControl->switchReady = 1; - /* Make room for redundancy */ - encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 ); - } else { - /* Direction: up */ - psEncC->sLP.mode = 1; - } - } - } else { - if (psEncC->sLP.mode<0) - psEncC->sLP.mode = 1; - } - } - } - - return fs_kHz; -} diff --git a/thirdparty/opus/silk/control_codec.c b/thirdparty/opus/silk/control_codec.c deleted file mode 100644 index 044eea3f2a..0000000000 --- a/thirdparty/opus/silk/control_codec.c +++ /dev/null @@ -1,428 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#ifdef FIXED_POINT -#include "main_FIX.h" -#define silk_encoder_state_Fxx silk_encoder_state_FIX -#else -#include "main_FLP.h" -#define silk_encoder_state_Fxx silk_encoder_state_FLP -#endif -#include "stack_alloc.h" -#include "tuning_parameters.h" -#include "pitch_est_defines.h" - -static opus_int silk_setup_resamplers( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz /* I */ -); - -static opus_int silk_setup_fs( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz, /* I */ - opus_int PacketSize_ms /* I */ -); - -static opus_int silk_setup_complexity( - silk_encoder_state *psEncC, /* I/O */ - opus_int Complexity /* I */ -); - -static OPUS_INLINE opus_int silk_setup_LBRR( - silk_encoder_state *psEncC, /* I/O */ - const opus_int32 TargetRate_bps /* I */ -); - - -/* Control encoder */ -opus_int silk_control_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl, /* I Control structure */ - const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ - const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ - const opus_int channelNb, /* I Channel number */ - const opus_int force_fs_kHz -) -{ - opus_int fs_kHz, ret = 0; - - psEnc->sCmn.useDTX = encControl->useDTX; - psEnc->sCmn.useCBR = encControl->useCBR; - psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate; - psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate; - psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate; - psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate; - psEnc->sCmn.useInBandFEC = encControl->useInBandFEC; - psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI; - psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal; - psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch; - psEnc->sCmn.channelNb = channelNb; - - if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) { - if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) { - /* Change in API sampling rate in the middle of encoding a packet */ - ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz ); - } - return ret; - } - - /* Beyond this point we know that there are no previously coded frames in the payload buffer */ - - /********************************************/ - /* Determine internal sampling rate */ - /********************************************/ - fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl ); - if( force_fs_kHz ) { - fs_kHz = force_fs_kHz; - } - /********************************************/ - /* Prepare resampler and buffered data */ - /********************************************/ - ret += silk_setup_resamplers( psEnc, fs_kHz ); - - /********************************************/ - /* Set internal sampling frequency */ - /********************************************/ - ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms ); - - /********************************************/ - /* Set encoding complexity */ - /********************************************/ - ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity ); - - /********************************************/ - /* Set packet loss rate measured by farend */ - /********************************************/ - psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage; - - /********************************************/ - /* Set LBRR usage */ - /********************************************/ - ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps ); - - psEnc->sCmn.controlled_since_last_payload = 1; - - return ret; -} - -static opus_int silk_setup_resamplers( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz /* I */ -) -{ - opus_int ret = SILK_NO_ERROR; - SAVE_STACK; - - if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz ) - { - if( psEnc->sCmn.fs_kHz == 0 ) { - /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ - ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 ); - } else { - VARDECL( opus_int16, x_buf_API_fs_Hz ); - VARDECL( silk_resampler_state_struct, temp_resampler_state ); -#ifdef FIXED_POINT - opus_int16 *x_bufFIX = psEnc->x_buf; -#else - VARDECL( opus_int16, x_bufFIX ); - opus_int32 new_buf_samples; -#endif - opus_int32 api_buf_samples; - opus_int32 old_buf_samples; - opus_int32 buf_length_ms; - - buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS; - old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz; - -#ifndef FIXED_POINT - new_buf_samples = buf_length_ms * fs_kHz; - ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ), - opus_int16 ); - silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples ); -#endif - - /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ - ALLOC( temp_resampler_state, 1, silk_resampler_state_struct ); - ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 ); - - /* Calculate number of samples to temporarily upsample */ - api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 ); - - /* Temporary resampling of x_buf data to API_fs_Hz */ - ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 ); - ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples ); - - /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ - ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 ); - - /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ - ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples ); - -#ifndef FIXED_POINT - silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples); -#endif - } - } - - psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz; - - RESTORE_STACK; - return ret; -} - -static opus_int silk_setup_fs( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz, /* I */ - opus_int PacketSize_ms /* I */ -) -{ - opus_int ret = SILK_NO_ERROR; - - /* Set packet size */ - if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) { - if( ( PacketSize_ms != 10 ) && - ( PacketSize_ms != 20 ) && - ( PacketSize_ms != 40 ) && - ( PacketSize_ms != 60 ) ) { - ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; - } - if( PacketSize_ms <= 10 ) { - psEnc->sCmn.nFramesPerPacket = 1; - psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1; - psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz ); - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); - if( psEnc->sCmn.fs_kHz == 8 ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; - } - } else { - psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS ); - psEnc->sCmn.nb_subfr = MAX_NB_SUBFR; - psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz ); - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); - if( psEnc->sCmn.fs_kHz == 8 ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; - } - } - psEnc->sCmn.PacketSize_ms = PacketSize_ms; - psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ - } - - /* Set internal sampling frequency */ - silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); - silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); - if( psEnc->sCmn.fs_kHz != fs_kHz ) { - /* reset part of the state */ - silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); - silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) ); - silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); - silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); - silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); - psEnc->sCmn.inputBufIx = 0; - psEnc->sCmn.nFramesEncoded = 0; - psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ - - /* Initialize non-zero parameters */ - psEnc->sCmn.prevLag = 100; - psEnc->sCmn.first_frame_after_reset = 1; - psEnc->sPrefilt.lagPrev = 100; - psEnc->sShape.LastGainIndex = 10; - psEnc->sCmn.sNSQ.lagPrev = 100; - psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536; - psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; - - psEnc->sCmn.fs_kHz = fs_kHz; - if( psEnc->sCmn.fs_kHz == 8 ) { - if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; - } - } else { - if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; - } - } - if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) { - psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER; - psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB; - } else { - psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER; - psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB; - } - psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz; - psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr ); - psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); - psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz ); - psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz ); - if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); - } else { - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); - } - if( psEnc->sCmn.fs_kHz == 16 ) { - psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 ); - psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; - } else if( psEnc->sCmn.fs_kHz == 12 ) { - psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 ); - psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; - } else { - psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 ); - psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; - } - } - - /* Check that settings are valid */ - silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); - - return ret; -} - -static opus_int silk_setup_complexity( - silk_encoder_state *psEncC, /* I/O */ - opus_int Complexity /* I */ -) -{ - opus_int ret = 0; - - /* Set encoding complexity */ - silk_assert( Complexity >= 0 && Complexity <= 10 ); - if( Complexity < 2 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); - psEncC->pitchEstimationLPCOrder = 6; - psEncC->shapingLPCOrder = 8; - psEncC->la_shape = 3 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 1; - psEncC->useInterpolatedNLSFs = 0; - psEncC->LTPQuantLowComplexity = 1; - psEncC->NLSF_MSVQ_Survivors = 2; - psEncC->warping_Q16 = 0; - } else if( Complexity < 4 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); - psEncC->pitchEstimationLPCOrder = 8; - psEncC->shapingLPCOrder = 10; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 1; - psEncC->useInterpolatedNLSFs = 0; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 4; - psEncC->warping_Q16 = 0; - } else if( Complexity < 6 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 ); - psEncC->pitchEstimationLPCOrder = 10; - psEncC->shapingLPCOrder = 12; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 2; - psEncC->useInterpolatedNLSFs = 1; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 8; - psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); - } else if( Complexity < 8 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 ); - psEncC->pitchEstimationLPCOrder = 12; - psEncC->shapingLPCOrder = 14; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 3; - psEncC->useInterpolatedNLSFs = 1; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 16; - psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); - } else { - psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 ); - psEncC->pitchEstimationLPCOrder = 16; - psEncC->shapingLPCOrder = 16; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES; - psEncC->useInterpolatedNLSFs = 1; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 32; - psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); - } - - /* Do not allow higher pitch estimation LPC order than predict LPC order */ - psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder ); - psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape; - psEncC->Complexity = Complexity; - - silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER ); - silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER ); - silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES ); - silk_assert( psEncC->warping_Q16 <= 32767 ); - silk_assert( psEncC->la_shape <= LA_SHAPE_MAX ); - silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); - silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS ); - - return ret; -} - -static OPUS_INLINE opus_int silk_setup_LBRR( - silk_encoder_state *psEncC, /* I/O */ - const opus_int32 TargetRate_bps /* I */ -) -{ - opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR; - opus_int32 LBRR_rate_thres_bps; - - LBRR_in_previous_packet = psEncC->LBRR_enabled; - psEncC->LBRR_enabled = 0; - if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) { - if( psEncC->fs_kHz == 8 ) { - LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS; - } else if( psEncC->fs_kHz == 12 ) { - LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS; - } else { - LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS; - } - LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) ); - - if( TargetRate_bps > LBRR_rate_thres_bps ) { - /* Set gain increase for coding LBRR excitation */ - if( LBRR_in_previous_packet == 0 ) { - /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */ - psEncC->LBRR_GainIncreases = 7; - } else { - psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); - } - psEncC->LBRR_enabled = 1; - } - } - - return ret; -} diff --git a/thirdparty/opus/silk/debug.c b/thirdparty/opus/silk/debug.c deleted file mode 100644 index 9253faf71b..0000000000 --- a/thirdparty/opus/silk/debug.c +++ /dev/null @@ -1,170 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "debug.h" -#include "SigProc_FIX.h" - -#if SILK_TIC_TOC - -#ifdef _WIN32 - -#if (defined(_WIN32) || defined(_WINCE)) -#include /* timer */ -#else /* Linux or Mac*/ -#include -#endif - -unsigned long silk_GetHighResolutionTime(void) /* O time in usec*/ -{ - /* Returns a time counter in microsec */ - /* the resolution is platform dependent */ - /* but is typically 1.62 us resolution */ - LARGE_INTEGER lpPerformanceCount; - LARGE_INTEGER lpFrequency; - QueryPerformanceCounter(&lpPerformanceCount); - QueryPerformanceFrequency(&lpFrequency); - return (unsigned long)((1000000*(lpPerformanceCount.QuadPart)) / lpFrequency.QuadPart); -} -#else /* Linux or Mac*/ -unsigned long GetHighResolutionTime(void) /* O time in usec*/ -{ - struct timeval tv; - gettimeofday(&tv, 0); - return((tv.tv_sec*1000000)+(tv.tv_usec)); -} -#endif - -int silk_Timer_nTimers = 0; -int silk_Timer_depth_ctr = 0; -char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN]; -#ifdef WIN32 -LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX]; -#else -unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX]; -#endif -unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX]; - -#ifdef WIN32 -void silk_TimerSave(char *file_name) -{ - if( silk_Timer_nTimers > 0 ) - { - int k; - FILE *fp; - LARGE_INTEGER lpFrequency; - LARGE_INTEGER lpPerformanceCount1, lpPerformanceCount2; - int del = 0x7FFFFFFF; - double avg, sum_avg; - /* estimate overhead of calling performance counters */ - for( k = 0; k < 1000; k++ ) { - QueryPerformanceCounter(&lpPerformanceCount1); - QueryPerformanceCounter(&lpPerformanceCount2); - lpPerformanceCount2.QuadPart -= lpPerformanceCount1.QuadPart; - if( (int)lpPerformanceCount2.LowPart < del ) - del = lpPerformanceCount2.LowPart; - } - QueryPerformanceFrequency(&lpFrequency); - /* print results to file */ - sum_avg = 0.0f; - for( k = 0; k < silk_Timer_nTimers; k++ ) { - if (silk_Timer_depth[k] == 0) { - sum_avg += (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart * silk_Timer_cnt[k]; - } - } - fp = fopen(file_name, "w"); - fprintf(fp, " min avg %% max count\n"); - for( k = 0; k < silk_Timer_nTimers; k++ ) { - if (silk_Timer_depth[k] == 0) { - fprintf(fp, "%-28s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 1) { - fprintf(fp, " %-27s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 2) { - fprintf(fp, " %-26s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 3) { - fprintf(fp, " %-25s", silk_Timer_tags[k]); - } else { - fprintf(fp, " %-24s", silk_Timer_tags[k]); - } - avg = (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart; - fprintf(fp, "%8.2f", (1e6 * (silk_max_64(silk_Timer_min[k] - del, 0))) / lpFrequency.QuadPart); - fprintf(fp, "%12.2f %6.2f", avg, 100.0 * avg / sum_avg * silk_Timer_cnt[k]); - fprintf(fp, "%12.2f", (1e6 * (silk_max_64(silk_Timer_max[k] - del, 0))) / lpFrequency.QuadPart); - fprintf(fp, "%10d\n", silk_Timer_cnt[k]); - } - fprintf(fp, " microseconds\n"); - fclose(fp); - } -} -#else -void silk_TimerSave(char *file_name) -{ - if( silk_Timer_nTimers > 0 ) - { - int k; - FILE *fp; - /* print results to file */ - fp = fopen(file_name, "w"); - fprintf(fp, " min avg max count\n"); - for( k = 0; k < silk_Timer_nTimers; k++ ) - { - if (silk_Timer_depth[k] == 0) { - fprintf(fp, "%-28s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 1) { - fprintf(fp, " %-27s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 2) { - fprintf(fp, " %-26s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 3) { - fprintf(fp, " %-25s", silk_Timer_tags[k]); - } else { - fprintf(fp, " %-24s", silk_Timer_tags[k]); - } - fprintf(fp, "%d ", silk_Timer_min[k]); - fprintf(fp, "%f ", (double)silk_Timer_sum[k] / (double)silk_Timer_cnt[k]); - fprintf(fp, "%d ", silk_Timer_max[k]); - fprintf(fp, "%10d\n", silk_Timer_cnt[k]); - } - fprintf(fp, " microseconds\n"); - fclose(fp); - } -} -#endif - -#endif /* SILK_TIC_TOC */ - -#if SILK_DEBUG -FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ]; -int silk_debug_store_count = 0; -#endif /* SILK_DEBUG */ - diff --git a/thirdparty/opus/silk/debug.h b/thirdparty/opus/silk/debug.h deleted file mode 100644 index efb6d3e99e..0000000000 --- a/thirdparty/opus/silk/debug.h +++ /dev/null @@ -1,279 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_DEBUG_H -#define SILK_DEBUG_H - -#include "typedef.h" -#include /* file writing */ -#include /* strcpy, strcmp */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -unsigned long GetHighResolutionTime(void); /* O time in usec*/ - -/* make SILK_DEBUG dependent on compiler's _DEBUG */ -#if defined _WIN32 - #ifdef _DEBUG - #define SILK_DEBUG 1 - #else - #define SILK_DEBUG 0 - #endif - - /* overrule the above */ - #if 0 - /* #define NO_ASSERTS*/ - #undef SILK_DEBUG - #define SILK_DEBUG 1 - #endif -#else - #define SILK_DEBUG 0 -#endif - -/* Flag for using timers */ -#define SILK_TIC_TOC 0 - - -#if SILK_TIC_TOC - -#if (defined(_WIN32) || defined(_WINCE)) -#include /* timer */ -#else /* Linux or Mac*/ -#include -#endif - -/*********************************/ -/* timer functions for profiling */ -/*********************************/ -/* example: */ -/* */ -/* TIC(LPC) */ -/* do_LPC(in_vec, order, acoef); // do LPC analysis */ -/* TOC(LPC) */ -/* */ -/* and call the following just before exiting (from main) */ -/* */ -/* silk_TimerSave("silk_TimingData.txt"); */ -/* */ -/* results are now in silk_TimingData.txt */ - -void silk_TimerSave(char *file_name); - -/* max number of timers (in different locations) */ -#define silk_NUM_TIMERS_MAX 50 -/* max length of name tags in TIC(..), TOC(..) */ -#define silk_NUM_TIMERS_MAX_TAG_LEN 30 - -extern int silk_Timer_nTimers; -extern int silk_Timer_depth_ctr; -extern char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN]; -#ifdef _WIN32 -extern LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX]; -#else -extern unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX]; -#endif -extern unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX]; - -/* WARNING: TIC()/TOC can measure only up to 0.1 seconds at a time */ -#ifdef _WIN32 -#define TIC(TAG_NAME) { \ - static int init = 0; \ - static int ID = -1; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - if (ID == -1) { \ - ID = silk_Timer_nTimers; \ - silk_Timer_nTimers++; \ - silk_Timer_depth[ID] = silk_Timer_depth_ctr; \ - strcpy(silk_Timer_tags[ID], #TAG_NAME); \ - silk_Timer_cnt[ID] = 0; \ - silk_Timer_sum[ID] = 0; \ - silk_Timer_min[ID] = 0xFFFFFFFF; \ - silk_Timer_max[ID] = 0; \ - } \ - } \ - silk_Timer_depth_ctr++; \ - QueryPerformanceCounter(&silk_Timer_start[ID]); \ -} -#else -#define TIC(TAG_NAME) { \ - static int init = 0; \ - static int ID = -1; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - if (ID == -1) { \ - ID = silk_Timer_nTimers; \ - silk_Timer_nTimers++; \ - silk_Timer_depth[ID] = silk_Timer_depth_ctr; \ - strcpy(silk_Timer_tags[ID], #TAG_NAME); \ - silk_Timer_cnt[ID] = 0; \ - silk_Timer_sum[ID] = 0; \ - silk_Timer_min[ID] = 0xFFFFFFFF; \ - silk_Timer_max[ID] = 0; \ - } \ - } \ - silk_Timer_depth_ctr++; \ - silk_Timer_start[ID] = GetHighResolutionTime(); \ -} -#endif - -#ifdef _WIN32 -#define TOC(TAG_NAME) { \ - LARGE_INTEGER lpPerformanceCount; \ - static int init = 0; \ - static int ID = 0; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - } \ - QueryPerformanceCounter(&lpPerformanceCount); \ - lpPerformanceCount.QuadPart -= silk_Timer_start[ID].QuadPart; \ - if((lpPerformanceCount.QuadPart < 100000000) && \ - (lpPerformanceCount.QuadPart >= 0)) { \ - silk_Timer_cnt[ID]++; \ - silk_Timer_sum[ID] += lpPerformanceCount.QuadPart; \ - if( lpPerformanceCount.QuadPart > silk_Timer_max[ID] ) \ - silk_Timer_max[ID] = lpPerformanceCount.QuadPart; \ - if( lpPerformanceCount.QuadPart < silk_Timer_min[ID] ) \ - silk_Timer_min[ID] = lpPerformanceCount.QuadPart; \ - } \ - silk_Timer_depth_ctr--; \ -} -#else -#define TOC(TAG_NAME) { \ - unsigned long endTime; \ - static int init = 0; \ - static int ID = 0; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - } \ - endTime = GetHighResolutionTime(); \ - endTime -= silk_Timer_start[ID]; \ - if((endTime < 100000000) && \ - (endTime >= 0)) { \ - silk_Timer_cnt[ID]++; \ - silk_Timer_sum[ID] += endTime; \ - if( endTime > silk_Timer_max[ID] ) \ - silk_Timer_max[ID] = endTime; \ - if( endTime < silk_Timer_min[ID] ) \ - silk_Timer_min[ID] = endTime; \ - } \ - silk_Timer_depth_ctr--; \ -} -#endif - -#else /* SILK_TIC_TOC */ - -/* define macros as empty strings */ -#define TIC(TAG_NAME) -#define TOC(TAG_NAME) -#define silk_TimerSave(FILE_NAME) - -#endif /* SILK_TIC_TOC */ - - -#if SILK_DEBUG -/************************************/ -/* write data to file for debugging */ -/************************************/ -/* Example: DEBUG_STORE_DATA(testfile.pcm, &RIN[0], 160*sizeof(opus_int16)); */ - -#define silk_NUM_STORES_MAX 100 -extern FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ]; -extern int silk_debug_store_count; - -/* Faster way of storing the data */ -#define DEBUG_STORE_DATA( FILE_NAME, DATA_PTR, N_BYTES ) { \ - static opus_int init = 0, cnt = 0; \ - static FILE **fp; \ - if (init == 0) { \ - init = 1; \ - cnt = silk_debug_store_count++; \ - silk_debug_store_fp[ cnt ] = fopen(#FILE_NAME, "wb"); \ - } \ - fwrite((DATA_PTR), (N_BYTES), 1, silk_debug_store_fp[ cnt ]); \ -} - -/* Call this at the end of main() */ -#define SILK_DEBUG_STORE_CLOSE_FILES { \ - opus_int i; \ - for( i = 0; i < silk_debug_store_count; i++ ) { \ - fclose( silk_debug_store_fp[ i ] ); \ - } \ -} - -#else /* SILK_DEBUG */ - -/* define macros as empty strings */ -#define DEBUG_STORE_DATA(FILE_NAME, DATA_PTR, N_BYTES) -#define SILK_DEBUG_STORE_CLOSE_FILES - -#endif /* SILK_DEBUG */ - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_DEBUG_H */ diff --git a/thirdparty/opus/silk/dec_API.c b/thirdparty/opus/silk/dec_API.c deleted file mode 100644 index b7d8ed48d8..0000000000 --- a/thirdparty/opus/silk/dec_API.c +++ /dev/null @@ -1,419 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#include "API.h" -#include "main.h" -#include "stack_alloc.h" -#include "os_support.h" - -/************************/ -/* Decoder Super Struct */ -/************************/ -typedef struct { - silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; - stereo_dec_state sStereo; - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int prev_decode_only_middle; -} silk_decoder; - -/*********************/ -/* Decoder functions */ -/*********************/ - -opus_int silk_Get_Decoder_Size( /* O Returns error code */ - opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ -) -{ - opus_int ret = SILK_NO_ERROR; - - *decSizeBytes = sizeof( silk_decoder ); - - return ret; -} - -/* Reset decoder state */ -opus_int silk_InitDecoder( /* O Returns error code */ - void *decState /* I/O State */ -) -{ - opus_int n, ret = SILK_NO_ERROR; - silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; - - for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { - ret = silk_init_decoder( &channel_state[ n ] ); - } - silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); - /* Not strictly needed, but it's cleaner that way */ - ((silk_decoder *)decState)->prev_decode_only_middle = 0; - - return ret; -} - -/* Decode a frame */ -opus_int silk_Decode( /* O Returns error code */ - void* decState, /* I/O State */ - silk_DecControlStruct* decControl, /* I/O Control Structure */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 *samplesOut, /* O Decoded output speech vector */ - opus_int32 *nSamplesOut, /* O Number of samples decoded */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; - opus_int32 nSamplesOutDec, LBRR_symbol; - opus_int16 *samplesOut1_tmp[ 2 ]; - VARDECL( opus_int16, samplesOut1_tmp_storage1 ); - VARDECL( opus_int16, samplesOut1_tmp_storage2 ); - VARDECL( opus_int16, samplesOut2_tmp ); - opus_int32 MS_pred_Q13[ 2 ] = { 0 }; - opus_int16 *resample_out_ptr; - silk_decoder *psDec = ( silk_decoder * )decState; - silk_decoder_state *channel_state = psDec->channel_state; - opus_int has_side; - opus_int stereo_to_mono; - int delay_stack_alloc; - SAVE_STACK; - - silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); - - /**********************************/ - /* Test if first frame in payload */ - /**********************************/ - if( newPacketFlag ) { - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ - } - } - - /* If Mono -> Stereo transition in bitstream: init state of second channel */ - if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { - ret += silk_init_decoder( &channel_state[ 1 ] ); - } - - stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && - ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); - - if( channel_state[ 0 ].nFramesDecoded == 0 ) { - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - opus_int fs_kHz_dec; - if( decControl->payloadSize_ms == 0 ) { - /* Assuming packet loss, use 10 ms */ - channel_state[ n ].nFramesPerPacket = 1; - channel_state[ n ].nb_subfr = 2; - } else if( decControl->payloadSize_ms == 10 ) { - channel_state[ n ].nFramesPerPacket = 1; - channel_state[ n ].nb_subfr = 2; - } else if( decControl->payloadSize_ms == 20 ) { - channel_state[ n ].nFramesPerPacket = 1; - channel_state[ n ].nb_subfr = 4; - } else if( decControl->payloadSize_ms == 40 ) { - channel_state[ n ].nFramesPerPacket = 2; - channel_state[ n ].nb_subfr = 4; - } else if( decControl->payloadSize_ms == 60 ) { - channel_state[ n ].nFramesPerPacket = 3; - channel_state[ n ].nb_subfr = 4; - } else { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_DEC_INVALID_FRAME_SIZE; - } - fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; - if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_DEC_INVALID_SAMPLING_FREQUENCY; - } - ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); - } - } - - if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { - silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); - silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); - silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); - } - psDec->nChannelsAPI = decControl->nChannelsAPI; - psDec->nChannelsInternal = decControl->nChannelsInternal; - - if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { - ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; - RESTORE_STACK; - return( ret ); - } - - if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { - /* First decoder call for this payload */ - /* Decode VAD flags and LBRR flag */ - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { - channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); - } - channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); - } - /* Decode LBRR flags */ - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); - if( channel_state[ n ].LBRR_flag ) { - if( channel_state[ n ].nFramesPerPacket == 1 ) { - channel_state[ n ].LBRR_flags[ 0 ] = 1; - } else { - LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; - for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { - channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; - } - } - } - } - - if( lostFlag == FLAG_DECODE_NORMAL ) { - /* Regular decoding: skip all LBRR data */ - for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - if( channel_state[ n ].LBRR_flags[ i ] ) { - opus_int16 pulses[ MAX_FRAME_LENGTH ]; - opus_int condCoding; - - if( decControl->nChannelsInternal == 2 && n == 0 ) { - silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); - if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { - silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); - } - } - /* Use conditional coding if previous frame available */ - if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { - condCoding = CODE_CONDITIONALLY; - } else { - condCoding = CODE_INDEPENDENTLY; - } - silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); - silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, - channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); - } - } - } - } - } - - /* Get MS predictor index */ - if( decControl->nChannelsInternal == 2 ) { - if( lostFlag == FLAG_DECODE_NORMAL || - ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) - { - silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); - /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ - if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || - ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) - { - silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); - } else { - decode_only_middle = 0; - } - } else { - for( n = 0; n < 2; n++ ) { - MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; - } - } - } - - /* Reset side channel decoder prediction memory for first frame with side coding */ - if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { - silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); - silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); - psDec->channel_state[ 1 ].lagPrev = 100; - psDec->channel_state[ 1 ].LastGainIndex = 10; - psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; - psDec->channel_state[ 1 ].first_frame_after_reset = 1; - } - - /* Check if the temp buffer fits into the output PCM buffer. If it fits, - we can delay allocating the temp buffer until after the SILK peak stack - usage. We need to use a < and not a <= because of the two extra samples. */ - delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal - < decControl->API_sampleRate*decControl->nChannelsAPI; - ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE - : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), - opus_int16 ); - if ( delay_stack_alloc ) - { - samplesOut1_tmp[ 0 ] = samplesOut; - samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; - } else { - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; - } - - if( lostFlag == FLAG_DECODE_NORMAL ) { - has_side = !decode_only_middle; - } else { - has_side = !psDec->prev_decode_only_middle - || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); - } - /* Call decoder for one frame */ - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - if( n == 0 || has_side ) { - opus_int FrameIndex; - opus_int condCoding; - - FrameIndex = channel_state[ 0 ].nFramesDecoded - n; - /* Use independent coding if no previous frame available */ - if( FrameIndex <= 0 ) { - condCoding = CODE_INDEPENDENTLY; - } else if( lostFlag == FLAG_DECODE_LBRR ) { - condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; - } else if( n > 0 && psDec->prev_decode_only_middle ) { - /* If we skipped a side frame in this packet, we don't - need LTP scaling; the LTP state is well-defined. */ - condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; - } else { - condCoding = CODE_CONDITIONALLY; - } - ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch); - } else { - silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); - } - channel_state[ n ].nFramesDecoded++; - } - - if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { - /* Convert Mid/Side to Left/Right */ - silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); - } else { - /* Buffering */ - silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); - } - - /* Number of output samples */ - *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); - - /* Set up pointers to temp buffers */ - ALLOC( samplesOut2_tmp, - decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 ); - if( decControl->nChannelsAPI == 2 ) { - resample_out_ptr = samplesOut2_tmp; - } else { - resample_out_ptr = samplesOut; - } - - ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc - ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) - : ALLOC_NONE, - opus_int16 ); - if ( delay_stack_alloc ) { - OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; - } - for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { - - /* Resample decoded signal to API_sampleRate */ - ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); - - /* Interleave if stereo output and stereo stream */ - if( decControl->nChannelsAPI == 2 ) { - for( i = 0; i < *nSamplesOut; i++ ) { - samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; - } - } - } - - /* Create two channel output from mono stream */ - if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { - if ( stereo_to_mono ){ - /* Resample right channel for newly collapsed stereo just in case - we weren't doing collapsing when switching to mono */ - ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); - - for( i = 0; i < *nSamplesOut; i++ ) { - samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; - } - } else { - for( i = 0; i < *nSamplesOut; i++ ) { - samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; - } - } - } - - /* Export pitch lag, measured at 48 kHz sampling rate */ - if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { - int mult_tab[ 3 ] = { 6, 4, 3 }; - decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; - } else { - decControl->prevPitchLag = 0; - } - - if( lostFlag == FLAG_PACKET_LOST ) { - /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" - if we lose packets when the energy is going down */ - for ( i = 0; i < psDec->nChannelsInternal; i++ ) - psDec->channel_state[ i ].LastGainIndex = 10; - } else { - psDec->prev_decode_only_middle = decode_only_middle; - } - RESTORE_STACK; - return ret; -} - -#if 0 -/* Getting table of contents for a packet */ -opus_int silk_get_TOC( - const opus_uint8 *payload, /* I Payload data */ - const opus_int nBytesIn, /* I Number of input bytes */ - const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ - silk_TOC_struct *Silk_TOC /* O Type of content */ -) -{ - opus_int i, flags, ret = SILK_NO_ERROR; - - if( nBytesIn < 1 ) { - return -1; - } - if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { - return -1; - } - - silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); - - /* For stereo, extract the flags for the mid channel */ - flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); - - Silk_TOC->inbandFECFlag = flags & 1; - for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { - flags = silk_RSHIFT( flags, 1 ); - Silk_TOC->VADFlags[ i ] = flags & 1; - Silk_TOC->VADFlag |= flags & 1; - } - - return ret; -} -#endif diff --git a/thirdparty/opus/silk/decode_core.c b/thirdparty/opus/silk/decode_core.c deleted file mode 100644 index e569c0e72b..0000000000 --- a/thirdparty/opus/silk/decode_core.c +++ /dev/null @@ -1,239 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/**********************************************************/ -/* Core decoder. Performs inverse NSQ operation LTP + LPC */ -/**********************************************************/ -void silk_decode_core( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I Decoder control */ - opus_int16 xq[], /* O Decoded speech */ - const opus_int16 pulses[ MAX_FRAME_LENGTH ], /* I Pulse signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType; - opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ]; - VARDECL( opus_int16, sLTP ); - VARDECL( opus_int32, sLTP_Q15 ); - opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10; - opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14; - VARDECL( opus_int32, res_Q14 ); - VARDECL( opus_int32, sLPC_Q14 ); - SAVE_STACK; - - silk_assert( psDec->prev_gain_Q16 != 0 ); - - ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); - ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); - ALLOC( res_Q14, psDec->subfr_length, opus_int32 ); - ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); - - offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ]; - - if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) { - NLSF_interpolation_flag = 1; - } else { - NLSF_interpolation_flag = 0; - } - - /* Decode excitation */ - rand_seed = psDec->indices.Seed; - for( i = 0; i < psDec->frame_length; i++ ) { - rand_seed = silk_RAND( rand_seed ); - psDec->exc_Q14[ i ] = silk_LSHIFT( (opus_int32)pulses[ i ], 14 ); - if( psDec->exc_Q14[ i ] > 0 ) { - psDec->exc_Q14[ i ] -= QUANT_LEVEL_ADJUST_Q10 << 4; - } else - if( psDec->exc_Q14[ i ] < 0 ) { - psDec->exc_Q14[ i ] += QUANT_LEVEL_ADJUST_Q10 << 4; - } - psDec->exc_Q14[ i ] += offset_Q10 << 4; - if( rand_seed < 0 ) { - psDec->exc_Q14[ i ] = -psDec->exc_Q14[ i ]; - } - - rand_seed = silk_ADD32_ovflw( rand_seed, pulses[ i ] ); - } - - /* Copy LPC state */ - silk_memcpy( sLPC_Q14, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - - pexc_Q14 = psDec->exc_Q14; - pxq = xq; - sLTP_buf_idx = psDec->ltp_mem_length; - /* Loop over subframes */ - for( k = 0; k < psDec->nb_subfr; k++ ) { - pres_Q14 = res_Q14; - A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ]; - - /* Preload LPC coeficients to array on stack. Gives small performance gain */ - silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); - B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ]; - signalType = psDec->indices.signalType; - - Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 ); - inv_gain_Q31 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 47 ); - - /* Calculate gain adjustment factor */ - if( psDecCtrl->Gains_Q16[ k ] != psDec->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( psDec->prev_gain_Q16, psDecCtrl->Gains_Q16[ k ], 16 ); - - /* Scale short term state */ - for( i = 0; i < MAX_LPC_ORDER; i++ ) { - sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, sLPC_Q14[ i ] ); - } - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Save inv_gain */ - silk_assert( inv_gain_Q31 != 0 ); - psDec->prev_gain_Q16 = psDecCtrl->Gains_Q16[ k ]; - - /* Avoid abrupt transition from voiced PLC to unvoiced normal decoding */ - if( psDec->lossCnt && psDec->prevSignalType == TYPE_VOICED && - psDec->indices.signalType != TYPE_VOICED && k < MAX_NB_SUBFR/2 ) { - - silk_memset( B_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) ); - B_Q14[ LTP_ORDER/2 ] = SILK_FIX_CONST( 0.25, 14 ); - - signalType = TYPE_VOICED; - psDecCtrl->pitchL[ k ] = psDec->lagPrev; - } - - if( signalType == TYPE_VOICED ) { - /* Voiced */ - lag = psDecCtrl->pitchL[ k ]; - - /* Re-whitening */ - if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) { - /* Rewhiten with new A coefs */ - start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - if( k == 2 ) { - silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) ); - } - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ], - A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order, arch ); - - /* After rewhitening the LTP state is unscaled */ - if( k == 0 ) { - /* Do LTP downscaling to reduce inter-packet dependency */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, psDecCtrl->LTP_scale_Q14 ), 2 ); - } - for( i = 0; i < lag + LTP_ORDER/2; i++ ) { - sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q31, sLTP[ psDec->ltp_mem_length - i - 1 ] ); - } - } else { - /* Update LTP state when Gain changes */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - for( i = 0; i < lag + LTP_ORDER/2; i++ ) { - sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ sLTP_buf_idx - i - 1 ] ); - } - } - } - } - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Set up pointer */ - pred_lag_ptr = &sLTP_Q15[ sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - for( i = 0; i < psDec->subfr_length; i++ ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q13 = 2; - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], B_Q14[ 0 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], B_Q14[ 1 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], B_Q14[ 2 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], B_Q14[ 3 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], B_Q14[ 4 ] ); - pred_lag_ptr++; - - /* Generate LPC excitation */ - pres_Q14[ i ] = silk_ADD_LSHIFT32( pexc_Q14[ i ], LTP_pred_Q13, 1 ); - - /* Update states */ - sLTP_Q15[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q14[ i ], 1 ); - sLTP_buf_idx++; - } - } else { - pres_Q14 = pexc_Q14; - } - - for( i = 0; i < psDec->subfr_length; i++ ) { - /* Short-term prediction */ - silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12_tmp[ 0 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12_tmp[ 1 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12_tmp[ 2 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12_tmp[ 3 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12_tmp[ 4 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12_tmp[ 5 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12_tmp[ 6 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12_tmp[ 7 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12_tmp[ 8 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] ); - if( psDec->LPC_order == 16 ) { - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12_tmp[ 10 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] ); - } - - /* Add prediction to LPC excitation */ - sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) ); - - /* Scale with gain */ - pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) ); - } - - /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */ - - /* Update LPC filter state */ - silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); - pexc_Q14 += psDec->subfr_length; - pxq += psDec->subfr_length; - } - - /* Save LPC state */ - silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/decode_frame.c b/thirdparty/opus/silk/decode_frame.c deleted file mode 100644 index a605d95ac6..0000000000 --- a/thirdparty/opus/silk/decode_frame.c +++ /dev/null @@ -1,129 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "PLC.h" - -/****************/ -/* Decode frame */ -/****************/ -opus_int silk_decode_frame( - silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pOut[], /* O Pointer to output speech frame */ - opus_int32 *pN, /* O Pointer to size of output frame */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int condCoding, /* I The type of conditional coding to use */ - int arch /* I Run-time architecture */ -) -{ - VARDECL( silk_decoder_control, psDecCtrl ); - opus_int L, mv_len, ret = 0; - SAVE_STACK; - - L = psDec->frame_length; - ALLOC( psDecCtrl, 1, silk_decoder_control ); - psDecCtrl->LTP_scale_Q14 = 0; - - /* Safety checks */ - silk_assert( L > 0 && L <= MAX_FRAME_LENGTH ); - - if( lostFlag == FLAG_DECODE_NORMAL || - ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) - { - VARDECL( opus_int16, pulses ); - ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & - ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 ); - /*********************************************/ - /* Decode quantization indices of side info */ - /*********************************************/ - silk_decode_indices( psDec, psRangeDec, psDec->nFramesDecoded, lostFlag, condCoding ); - - /*********************************************/ - /* Decode quantization indices of excitation */ - /*********************************************/ - silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType, - psDec->indices.quantOffsetType, psDec->frame_length ); - - /********************************************/ - /* Decode parameters and pulse signal */ - /********************************************/ - silk_decode_parameters( psDec, psDecCtrl, condCoding ); - - /********************************************************/ - /* Run inverse NSQ */ - /********************************************************/ - silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch ); - - /********************************************************/ - /* Update PLC state */ - /********************************************************/ - silk_PLC( psDec, psDecCtrl, pOut, 0, arch ); - - psDec->lossCnt = 0; - psDec->prevSignalType = psDec->indices.signalType; - silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 ); - - /* A frame has been decoded without errors */ - psDec->first_frame_after_reset = 0; - } else { - /* Handle packet loss by extrapolation */ - silk_PLC( psDec, psDecCtrl, pOut, 1, arch ); - } - - /*************************/ - /* Update output buffer. */ - /*************************/ - silk_assert( psDec->ltp_mem_length >= psDec->frame_length ); - mv_len = psDec->ltp_mem_length - psDec->frame_length; - silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); - silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); - - /************************************************/ - /* Comfort noise generation / estimation */ - /************************************************/ - silk_CNG( psDec, psDecCtrl, pOut, L ); - - /****************************************************************/ - /* Ensure smooth connection of extrapolated and good frames */ - /****************************************************************/ - silk_PLC_glue_frames( psDec, pOut, L ); - - /* Update some decoder state variables */ - psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; - - /* Set output frame length */ - *pN = L; - - RESTORE_STACK; - return ret; -} diff --git a/thirdparty/opus/silk/decode_indices.c b/thirdparty/opus/silk/decode_indices.c deleted file mode 100644 index 7afe5c26c1..0000000000 --- a/thirdparty/opus/silk/decode_indices.c +++ /dev/null @@ -1,151 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Decode side-information parameters from payload */ -void silk_decode_indices( - silk_decoder_state *psDec, /* I/O State */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i, k, Ix; - opus_int decode_absolute_lagIndex, delta_lagIndex; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - - /*******************************************/ - /* Decode signal type and quantizer offset */ - /*******************************************/ - if( decode_LBRR || psDec->VAD_flags[ FrameIndex ] ) { - Ix = ec_dec_icdf( psRangeDec, silk_type_offset_VAD_iCDF, 8 ) + 2; - } else { - Ix = ec_dec_icdf( psRangeDec, silk_type_offset_no_VAD_iCDF, 8 ); - } - psDec->indices.signalType = (opus_int8)silk_RSHIFT( Ix, 1 ); - psDec->indices.quantOffsetType = (opus_int8)( Ix & 1 ); - - /****************/ - /* Decode gains */ - /****************/ - /* First subframe */ - if( condCoding == CODE_CONDITIONALLY ) { - /* Conditional coding */ - psDec->indices.GainsIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 ); - } else { - /* Independent coding, in two stages: MSB bits followed by 3 LSBs */ - psDec->indices.GainsIndices[ 0 ] = (opus_int8)silk_LSHIFT( ec_dec_icdf( psRangeDec, silk_gain_iCDF[ psDec->indices.signalType ], 8 ), 3 ); - psDec->indices.GainsIndices[ 0 ] += (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform8_iCDF, 8 ); - } - - /* Remaining subframes */ - for( i = 1; i < psDec->nb_subfr; i++ ) { - psDec->indices.GainsIndices[ i ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 ); - } - - /**********************/ - /* Decode LSF Indices */ - /**********************/ - psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 ); - silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] ); - silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order ); - for( i = 0; i < psDec->psNLSF_CB->order; i++ ) { - Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - if( Ix == 0 ) { - Ix -= ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 ); - } else if( Ix == 2 * NLSF_QUANT_MAX_AMPLITUDE ) { - Ix += ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 ); - } - psDec->indices.NLSFIndices[ i+1 ] = (opus_int8)( Ix - NLSF_QUANT_MAX_AMPLITUDE ); - } - - /* Decode LSF interpolation factor */ - if( psDec->nb_subfr == MAX_NB_SUBFR ) { - psDec->indices.NLSFInterpCoef_Q2 = (opus_int8)ec_dec_icdf( psRangeDec, silk_NLSF_interpolation_factor_iCDF, 8 ); - } else { - psDec->indices.NLSFInterpCoef_Q2 = 4; - } - - if( psDec->indices.signalType == TYPE_VOICED ) - { - /*********************/ - /* Decode pitch lags */ - /*********************/ - /* Get lag index */ - decode_absolute_lagIndex = 1; - if( condCoding == CODE_CONDITIONALLY && psDec->ec_prevSignalType == TYPE_VOICED ) { - /* Decode Delta index */ - delta_lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_delta_iCDF, 8 ); - if( delta_lagIndex > 0 ) { - delta_lagIndex = delta_lagIndex - 9; - psDec->indices.lagIndex = (opus_int16)( psDec->ec_prevLagIndex + delta_lagIndex ); - decode_absolute_lagIndex = 0; - } - } - if( decode_absolute_lagIndex ) { - /* Absolute decoding */ - psDec->indices.lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_lag_iCDF, 8 ) * silk_RSHIFT( psDec->fs_kHz, 1 ); - psDec->indices.lagIndex += (opus_int16)ec_dec_icdf( psRangeDec, psDec->pitch_lag_low_bits_iCDF, 8 ); - } - psDec->ec_prevLagIndex = psDec->indices.lagIndex; - - /* Get countour index */ - psDec->indices.contourIndex = (opus_int8)ec_dec_icdf( psRangeDec, psDec->pitch_contour_iCDF, 8 ); - - /********************/ - /* Decode LTP gains */ - /********************/ - /* Decode PERIndex value */ - psDec->indices.PERIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_per_index_iCDF, 8 ); - - for( k = 0; k < psDec->nb_subfr; k++ ) { - psDec->indices.LTPIndex[ k ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_gain_iCDF_ptrs[ psDec->indices.PERIndex ], 8 ); - } - - /**********************/ - /* Decode LTP scaling */ - /**********************/ - if( condCoding == CODE_INDEPENDENTLY ) { - psDec->indices.LTP_scaleIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTPscale_iCDF, 8 ); - } else { - psDec->indices.LTP_scaleIndex = 0; - } - } - psDec->ec_prevSignalType = psDec->indices.signalType; - - /***************/ - /* Decode seed */ - /***************/ - psDec->indices.Seed = (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform4_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/decode_parameters.c b/thirdparty/opus/silk/decode_parameters.c deleted file mode 100644 index e345b1dcef..0000000000 --- a/thirdparty/opus/silk/decode_parameters.c +++ /dev/null @@ -1,115 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Decode parameters from payload */ -void silk_decode_parameters( - silk_decoder_state *psDec, /* I/O State */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i, k, Ix; - opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], pNLSF0_Q15[ MAX_LPC_ORDER ]; - const opus_int8 *cbk_ptr_Q7; - - /* Dequant Gains */ - silk_gains_dequant( psDecCtrl->Gains_Q16, psDec->indices.GainsIndices, - &psDec->LastGainIndex, condCoding == CODE_CONDITIONALLY, psDec->nb_subfr ); - - /****************/ - /* Decode NLSFs */ - /****************/ - silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB ); - - /* Convert NLSF parameters to AR prediction filter coefficients */ - silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order ); - - /* If just reset, e.g., because internal Fs changed, do not allow interpolation */ - /* improves the case of packet loss in the first frame after a switch */ - if( psDec->first_frame_after_reset == 1 ) { - psDec->indices.NLSFInterpCoef_Q2 = 4; - } - - if( psDec->indices.NLSFInterpCoef_Q2 < 4 ) { - /* Calculation of the interpolated NLSF0 vector from the interpolation factor, */ - /* the previous NLSF1, and the current NLSF1 */ - for( i = 0; i < psDec->LPC_order; i++ ) { - pNLSF0_Q15[ i ] = psDec->prevNLSF_Q15[ i ] + silk_RSHIFT( silk_MUL( psDec->indices.NLSFInterpCoef_Q2, - pNLSF_Q15[ i ] - psDec->prevNLSF_Q15[ i ] ), 2 ); - } - - /* Convert NLSF parameters to AR prediction filter coefficients */ - silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order ); - } else { - /* Copy LPC coefficients for first half from second half */ - silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) ); - } - - silk_memcpy( psDec->prevNLSF_Q15, pNLSF_Q15, psDec->LPC_order * sizeof( opus_int16 ) ); - - /* After a packet loss do BWE of LPC coefs */ - if( psDec->lossCnt ) { - silk_bwexpander( psDecCtrl->PredCoef_Q12[ 0 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 ); - silk_bwexpander( psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 ); - } - - if( psDec->indices.signalType == TYPE_VOICED ) { - /*********************/ - /* Decode pitch lags */ - /*********************/ - - /* Decode pitch values */ - silk_decode_pitch( psDec->indices.lagIndex, psDec->indices.contourIndex, psDecCtrl->pitchL, psDec->fs_kHz, psDec->nb_subfr ); - - /* Decode Codebook Index */ - cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ psDec->indices.PERIndex ]; /* set pointer to start of codebook */ - - for( k = 0; k < psDec->nb_subfr; k++ ) { - Ix = psDec->indices.LTPIndex[ k ]; - for( i = 0; i < LTP_ORDER; i++ ) { - psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER + i ] = silk_LSHIFT( cbk_ptr_Q7[ Ix * LTP_ORDER + i ], 7 ); - } - } - - /**********************/ - /* Decode LTP scaling */ - /**********************/ - Ix = psDec->indices.LTP_scaleIndex; - psDecCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ Ix ]; - } else { - silk_memset( psDecCtrl->pitchL, 0, psDec->nb_subfr * sizeof( opus_int ) ); - silk_memset( psDecCtrl->LTPCoef_Q14, 0, LTP_ORDER * psDec->nb_subfr * sizeof( opus_int16 ) ); - psDec->indices.PERIndex = 0; - psDecCtrl->LTP_scale_Q14 = 0; - } -} diff --git a/thirdparty/opus/silk/decode_pitch.c b/thirdparty/opus/silk/decode_pitch.c deleted file mode 100644 index fedbc6a525..0000000000 --- a/thirdparty/opus/silk/decode_pitch.c +++ /dev/null @@ -1,77 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/*********************************************************** -* Pitch analyser function -********************************************************** */ -#include "SigProc_FIX.h" -#include "pitch_est_defines.h" - -void silk_decode_pitch( - opus_int16 lagIndex, /* I */ - opus_int8 contourIndex, /* O */ - opus_int pitch_lags[], /* O 4 pitch values */ - const opus_int Fs_kHz, /* I sampling frequency (kHz) */ - const opus_int nb_subfr /* I number of sub frames */ -) -{ - opus_int lag, k, min_lag, max_lag, cbk_size; - const opus_int8 *Lag_CB_ptr; - - if( Fs_kHz == 8 ) { - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE2_EXT; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); - Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE2_10MS; - } - } else { - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - } - - min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz ); - max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ); - lag = min_lag + lagIndex; - - for( k = 0; k < nb_subfr; k++ ) { - pitch_lags[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, contourIndex, cbk_size ); - pitch_lags[ k ] = silk_LIMIT( pitch_lags[ k ], min_lag, max_lag ); - } -} diff --git a/thirdparty/opus/silk/decode_pulses.c b/thirdparty/opus/silk/decode_pulses.c deleted file mode 100644 index d6bbec9225..0000000000 --- a/thirdparty/opus/silk/decode_pulses.c +++ /dev/null @@ -1,115 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/*********************************************/ -/* Decode quantization indices of excitation */ -/*********************************************/ -void silk_decode_pulses( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* O Excitation signal */ - const opus_int signalType, /* I Sigtype */ - const opus_int quantOffsetType, /* I quantOffsetType */ - const opus_int frame_length /* I Frame length */ -) -{ - opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex; - opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ]; - opus_int16 *pulses_ptr; - const opus_uint8 *cdf_ptr; - - /*********************/ - /* Decode rate level */ - /*********************/ - RateLevelIndex = ec_dec_icdf( psRangeDec, silk_rate_levels_iCDF[ signalType >> 1 ], 8 ); - - /* Calculate number of shell blocks */ - silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); - iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); - if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { - silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ - iter++; - } - - /***************************************************/ - /* Sum-Weighted-Pulses Decoding */ - /***************************************************/ - cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ]; - for( i = 0; i < iter; i++ ) { - nLshifts[ i ] = 0; - sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 ); - - /* LSB indication */ - while( sum_pulses[ i ] == SILK_MAX_PULSES + 1 ) { - nLshifts[ i ]++; - /* When we've already got 10 LSBs, we shift the table to not allow (SILK_MAX_PULSES + 1) */ - sum_pulses[ i ] = ec_dec_icdf( psRangeDec, - silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 ); - } - } - - /***************************************************/ - /* Shell decoding */ - /***************************************************/ - for( i = 0; i < iter; i++ ) { - if( sum_pulses[ i ] > 0 ) { - silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] ); - } else { - silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) ); - } - } - - /***************************************************/ - /* LSB Decoding */ - /***************************************************/ - for( i = 0; i < iter; i++ ) { - if( nLshifts[ i ] > 0 ) { - nLS = nLshifts[ i ]; - pulses_ptr = &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ]; - for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { - abs_q = pulses_ptr[ k ]; - for( j = 0; j < nLS; j++ ) { - abs_q = silk_LSHIFT( abs_q, 1 ); - abs_q += ec_dec_icdf( psRangeDec, silk_lsb_iCDF, 8 ); - } - pulses_ptr[ k ] = abs_q; - } - /* Mark the number of pulses non-zero for sign decoding. */ - sum_pulses[ i ] |= nLS << 5; - } - } - - /****************************************/ - /* Decode and add signs to pulse signal */ - /****************************************/ - silk_decode_signs( psRangeDec, pulses, frame_length, signalType, quantOffsetType, sum_pulses ); -} diff --git a/thirdparty/opus/silk/decoder_set_fs.c b/thirdparty/opus/silk/decoder_set_fs.c deleted file mode 100644 index eef0fd25e1..0000000000 --- a/thirdparty/opus/silk/decoder_set_fs.c +++ /dev/null @@ -1,108 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Set decoder sampling rate */ -opus_int silk_decoder_set_fs( - silk_decoder_state *psDec, /* I/O Decoder state pointer */ - opus_int fs_kHz, /* I Sampling frequency (kHz) */ - opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */ -) -{ - opus_int frame_length, ret = 0; - - silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); - silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 ); - - /* New (sub)frame length */ - psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz ); - frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length ); - - /* Initialize resampler when switching internal or external sampling frequency */ - if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) { - /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */ - ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz, 0 ); - - psDec->fs_API_hz = fs_API_Hz; - } - - if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) { - if( fs_kHz == 8 ) { - if( psDec->nb_subfr == MAX_NB_SUBFR ) { - psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; - } else { - psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; - } - } else { - if( psDec->nb_subfr == MAX_NB_SUBFR ) { - psDec->pitch_contour_iCDF = silk_pitch_contour_iCDF; - } else { - psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; - } - } - if( psDec->fs_kHz != fs_kHz ) { - psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); - if( fs_kHz == 8 || fs_kHz == 12 ) { - psDec->LPC_order = MIN_LPC_ORDER; - psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB; - } else { - psDec->LPC_order = MAX_LPC_ORDER; - psDec->psNLSF_CB = &silk_NLSF_CB_WB; - } - if( fs_kHz == 16 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; - } else if( fs_kHz == 12 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; - } else if( fs_kHz == 8 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; - } else { - /* unsupported sampling rate */ - silk_assert( 0 ); - } - psDec->first_frame_after_reset = 1; - psDec->lagPrev = 100; - psDec->LastGainIndex = 10; - psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY; - silk_memset( psDec->outBuf, 0, sizeof(psDec->outBuf)); - silk_memset( psDec->sLPC_Q14_buf, 0, sizeof(psDec->sLPC_Q14_buf) ); - } - - psDec->fs_kHz = fs_kHz; - psDec->frame_length = frame_length; - } - - /* Check that settings are valid */ - silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH ); - - return ret; -} - diff --git a/thirdparty/opus/silk/define.h b/thirdparty/opus/silk/define.h deleted file mode 100644 index 19c9b00e25..0000000000 --- a/thirdparty/opus/silk/define.h +++ /dev/null @@ -1,235 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_DEFINE_H -#define SILK_DEFINE_H - -#include "errors.h" -#include "typedef.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Max number of encoder channels (1/2) */ -#define ENCODER_NUM_CHANNELS 2 -/* Number of decoder channels (1/2) */ -#define DECODER_NUM_CHANNELS 2 - -#define MAX_FRAMES_PER_PACKET 3 - -/* Limits on bitrate */ -#define MIN_TARGET_RATE_BPS 5000 -#define MAX_TARGET_RATE_BPS 80000 -#define TARGET_RATE_TAB_SZ 8 - -/* LBRR thresholds */ -#define LBRR_NB_MIN_RATE_BPS 12000 -#define LBRR_MB_MIN_RATE_BPS 14000 -#define LBRR_WB_MIN_RATE_BPS 16000 - -/* DTX settings */ -#define NB_SPEECH_FRAMES_BEFORE_DTX 10 /* eq 200 ms */ -#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */ - -/* Maximum sampling frequency */ -#define MAX_FS_KHZ 16 -#define MAX_API_FS_KHZ 48 - -/* Signal types */ -#define TYPE_NO_VOICE_ACTIVITY 0 -#define TYPE_UNVOICED 1 -#define TYPE_VOICED 2 - -/* Conditional coding types */ -#define CODE_INDEPENDENTLY 0 -#define CODE_INDEPENDENTLY_NO_LTP_SCALING 1 -#define CODE_CONDITIONALLY 2 - -/* Settings for stereo processing */ -#define STEREO_QUANT_TAB_SIZE 16 -#define STEREO_QUANT_SUB_STEPS 5 -#define STEREO_INTERP_LEN_MS 8 /* must be even */ -#define STEREO_RATIO_SMOOTH_COEF 0.01 /* smoothing coef for signal norms and stereo width */ - -/* Range of pitch lag estimates */ -#define PITCH_EST_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */ -#define PITCH_EST_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */ - -/* Maximum number of subframes */ -#define MAX_NB_SUBFR 4 - -/* Number of samples per frame */ -#define LTP_MEM_LENGTH_MS 20 -#define SUB_FRAME_LENGTH_MS 5 -#define MAX_SUB_FRAME_LENGTH ( SUB_FRAME_LENGTH_MS * MAX_FS_KHZ ) -#define MAX_FRAME_LENGTH_MS ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR ) -#define MAX_FRAME_LENGTH ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ ) - -/* Milliseconds of lookahead for pitch analysis */ -#define LA_PITCH_MS 2 -#define LA_PITCH_MAX ( LA_PITCH_MS * MAX_FS_KHZ ) - -/* Order of LPC used in find pitch */ -#define MAX_FIND_PITCH_LPC_ORDER 16 - -/* Length of LPC window used in find pitch */ -#define FIND_PITCH_LPC_WIN_MS ( 20 + (LA_PITCH_MS << 1) ) -#define FIND_PITCH_LPC_WIN_MS_2_SF ( 10 + (LA_PITCH_MS << 1) ) -#define FIND_PITCH_LPC_WIN_MAX ( FIND_PITCH_LPC_WIN_MS * MAX_FS_KHZ ) - -/* Milliseconds of lookahead for noise shape analysis */ -#define LA_SHAPE_MS 5 -#define LA_SHAPE_MAX ( LA_SHAPE_MS * MAX_FS_KHZ ) - -/* Maximum length of LPC window used in noise shape analysis */ -#define SHAPE_LPC_WIN_MAX ( 15 * MAX_FS_KHZ ) - -/* dB level of lowest gain quantization level */ -#define MIN_QGAIN_DB 2 -/* dB level of highest gain quantization level */ -#define MAX_QGAIN_DB 88 -/* Number of gain quantization levels */ -#define N_LEVELS_QGAIN 64 -/* Max increase in gain quantization index */ -#define MAX_DELTA_GAIN_QUANT 36 -/* Max decrease in gain quantization index */ -#define MIN_DELTA_GAIN_QUANT -4 - -/* Quantization offsets (multiples of 4) */ -#define OFFSET_VL_Q10 32 -#define OFFSET_VH_Q10 100 -#define OFFSET_UVL_Q10 100 -#define OFFSET_UVH_Q10 240 - -#define QUANT_LEVEL_ADJUST_Q10 80 - -/* Maximum numbers of iterations used to stabilize an LPC vector */ -#define MAX_LPC_STABILIZE_ITERATIONS 16 -#define MAX_PREDICTION_POWER_GAIN 1e4f -#define MAX_PREDICTION_POWER_GAIN_AFTER_RESET 1e2f - -#define MAX_LPC_ORDER 16 -#define MIN_LPC_ORDER 10 - -/* Find Pred Coef defines */ -#define LTP_ORDER 5 - -/* LTP quantization settings */ -#define NB_LTP_CBKS 3 - -/* Flag to use harmonic noise shaping */ -#define USE_HARM_SHAPING 1 - -/* Max LPC order of noise shaping filters */ -#define MAX_SHAPE_LPC_ORDER 16 - -#define HARM_SHAPE_FIR_TAPS 3 - -/* Maximum number of delayed decision states */ -#define MAX_DEL_DEC_STATES 4 - -#define LTP_BUF_LENGTH 512 -#define LTP_MASK ( LTP_BUF_LENGTH - 1 ) - -#define DECISION_DELAY 32 -#define DECISION_DELAY_MASK ( DECISION_DELAY - 1 ) - -/* Number of subframes for excitation entropy coding */ -#define SHELL_CODEC_FRAME_LENGTH 16 -#define LOG2_SHELL_CODEC_FRAME_LENGTH 4 -#define MAX_NB_SHELL_BLOCKS ( MAX_FRAME_LENGTH / SHELL_CODEC_FRAME_LENGTH ) - -/* Number of rate levels, for entropy coding of excitation */ -#define N_RATE_LEVELS 10 - -/* Maximum sum of pulses per shell coding frame */ -#define SILK_MAX_PULSES 16 - -#define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */ - -#if( MAX_LPC_ORDER > DECISION_DELAY ) -# define NSQ_LPC_BUF_LENGTH MAX_LPC_ORDER -#else -# define NSQ_LPC_BUF_LENGTH DECISION_DELAY -#endif - -/***************************/ -/* Voice activity detector */ -/***************************/ -#define VAD_N_BANDS 4 - -#define VAD_INTERNAL_SUBFRAMES_LOG2 2 -#define VAD_INTERNAL_SUBFRAMES ( 1 << VAD_INTERNAL_SUBFRAMES_LOG2 ) - -#define VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 1024 /* Must be < 4096 */ -#define VAD_NOISE_LEVELS_BIAS 50 - -/* Sigmoid settings */ -#define VAD_NEGATIVE_OFFSET_Q5 128 /* sigmoid is 0 at -128 */ -#define VAD_SNR_FACTOR_Q16 45000 - -/* smoothing for SNR measurement */ -#define VAD_SNR_SMOOTH_COEF_Q18 4096 - -/* Size of the piecewise linear cosine approximation table for the LSFs */ -#define LSF_COS_TAB_SZ_FIX 128 - -/******************/ -/* NLSF quantizer */ -/******************/ -#define NLSF_W_Q 2 -#define NLSF_VQ_MAX_VECTORS 32 -#define NLSF_VQ_MAX_SURVIVORS 32 -#define NLSF_QUANT_MAX_AMPLITUDE 4 -#define NLSF_QUANT_MAX_AMPLITUDE_EXT 10 -#define NLSF_QUANT_LEVEL_ADJ 0.1 -#define NLSF_QUANT_DEL_DEC_STATES_LOG2 2 -#define NLSF_QUANT_DEL_DEC_STATES ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 ) - -/* Transition filtering for mode switching */ -#define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/ -#define TRANSITION_NB 3 /* Hardcoded in tables */ -#define TRANSITION_NA 2 /* Hardcoded in tables */ -#define TRANSITION_INT_NUM 5 /* Hardcoded in tables */ -#define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS ) -#define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) ) - -/* BWE factors to apply after packet loss */ -#define BWE_AFTER_LOSS_Q16 63570 - -/* Defines for CN generation */ -#define CNG_BUF_MASK_MAX 255 /* 2^floor(log2(MAX_FRAME_LENGTH))-1 */ -#define CNG_GAIN_SMTH_Q16 4634 /* 0.25^(1/4) */ -#define CNG_NLSF_SMTH_Q16 16348 /* 0.25 */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/enc_API.c b/thirdparty/opus/silk/enc_API.c deleted file mode 100644 index f8060286db..0000000000 --- a/thirdparty/opus/silk/enc_API.c +++ /dev/null @@ -1,563 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#include "define.h" -#include "API.h" -#include "control.h" -#include "typedef.h" -#include "stack_alloc.h" -#include "structs.h" -#include "tuning_parameters.h" -#ifdef FIXED_POINT -#include "main_FIX.h" -#else -#include "main_FLP.h" -#endif - -/***************************************/ -/* Read control structure from encoder */ -/***************************************/ -static opus_int silk_QueryEncoder( /* O Returns error code */ - const void *encState, /* I State */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -); - -/****************************************/ -/* Encoder functions */ -/****************************************/ - -opus_int silk_Get_Encoder_Size( /* O Returns error code */ - opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ -) -{ - opus_int ret = SILK_NO_ERROR; - - *encSizeBytes = sizeof( silk_encoder ); - - return ret; -} - -/*************************/ -/* Init or Reset encoder */ -/*************************/ -opus_int silk_InitEncoder( /* O Returns error code */ - void *encState, /* I/O State */ - int arch, /* I Run-time architecture */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -) -{ - silk_encoder *psEnc; - opus_int n, ret = SILK_NO_ERROR; - - psEnc = (silk_encoder *)encState; - - /* Reset encoder */ - silk_memset( psEnc, 0, sizeof( silk_encoder ) ); - for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { - if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { - silk_assert( 0 ); - } - } - - psEnc->nChannelsAPI = 1; - psEnc->nChannelsInternal = 1; - - /* Read control structure */ - if( ret += silk_QueryEncoder( encState, encStatus ) ) { - silk_assert( 0 ); - } - - return ret; -} - -/***************************************/ -/* Read control structure from encoder */ -/***************************************/ -static opus_int silk_QueryEncoder( /* O Returns error code */ - const void *encState, /* I State */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -) -{ - opus_int ret = SILK_NO_ERROR; - silk_encoder_state_Fxx *state_Fxx; - silk_encoder *psEnc = (silk_encoder *)encState; - - state_Fxx = psEnc->state_Fxx; - - encStatus->nChannelsAPI = psEnc->nChannelsAPI; - encStatus->nChannelsInternal = psEnc->nChannelsInternal; - encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz; - encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz; - encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz; - encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz; - encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms; - encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps; - encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc; - encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity; - encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC; - encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX; - encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR; - encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); - encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch; - encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0; - - return ret; -} - - -/**************************/ -/* Encode frame with Silk */ -/**************************/ -/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ -/* encControl->payloadSize_ms is set to */ -opus_int silk_Encode( /* O Returns error code */ - void *encState, /* I/O State */ - silk_EncControlStruct *encControl, /* I Control status */ - const opus_int16 *samplesIn, /* I Speech sample input vector */ - opus_int nSamplesIn, /* I Number of samples in input vector */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ - const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ -) -{ - opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; - opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; - opus_int nSamplesFromInput = 0, nSamplesFromInputMax; - opus_int speech_act_thr_for_switch_Q8; - opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; - silk_encoder *psEnc = ( silk_encoder * )encState; - VARDECL( opus_int16, buf ); - opus_int transition, curr_block, tot_blocks; - SAVE_STACK; - - if (encControl->reducedDependency) - { - psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1; - psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1; - } - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; - - /* Check values in encoder control structure */ - if( ( ret = check_control_input( encControl ) ) != 0 ) { - silk_assert( 0 ); - RESTORE_STACK; - return ret; - } - - encControl->switchReady = 0; - - if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { - /* Mono -> Stereo transition: init state of second channel and stereo state */ - ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); - silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); - silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); - psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; - psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; - psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; - psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; - psEnc->sStereo.width_prev_Q14 = 0; - psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); - if( psEnc->nChannelsAPI == 2 ) { - silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) ); - silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) ); - } - } - - transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal); - - psEnc->nChannelsAPI = encControl->nChannelsAPI; - psEnc->nChannelsInternal = encControl->nChannelsInternal; - - nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate ); - tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; - curr_block = 0; - if( prefillFlag ) { - /* Only accept input length of 10 ms */ - if( nBlocksOf10ms != 1 ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; - } - /* Reset Encoder */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); - silk_assert( !ret ); - } - tmp_payloadSize_ms = encControl->payloadSize_ms; - encControl->payloadSize_ms = 10; - tmp_complexity = encControl->complexity; - encControl->complexity = 0; - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; - psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1; - } - } else { - /* Only accept input lengths that are a multiple of 10 ms */ - if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; - } - /* Make sure no more than one packet can be produced */ - if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; - } - } - - TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 ); - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - /* Force the side channel to the same rate as the mid */ - opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; - if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { - silk_assert( 0 ); - RESTORE_STACK; - return ret; - } - if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) { - for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { - psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0; - } - } - psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; - } - silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); - - /* Input buffering/resampling and encoding */ - nSamplesToBufferMax = - 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; - nSamplesFromInputMax = - silk_DIV32_16( nSamplesToBufferMax * - psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, - psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); - ALLOC( buf, nSamplesFromInputMax, opus_int16 ); - while( 1 ) { - nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; - nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); - nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); - /* Resample and write to buffer */ - if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { - opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; - for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n ]; - } - /* Making sure to start both resamplers from the same state when switching from mono to stereo */ - if( psEnc->nPrevChannelsInternal == 1 && id==0 ) { - silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); - } - - ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; - - nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; - nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); - for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n + 1 ]; - } - ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; - } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { - /* Combine left and right channels before resampling */ - for( n = 0; n < nSamplesFromInput; n++ ) { - sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ]; - buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); - } - ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - /* On the first mono frame, average the results for the two resampler states */ - if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) { - ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) { - psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] = - silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] - + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); - } - } - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; - } else { - silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); - silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16)); - ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; - } - - samplesIn += nSamplesFromInput * encControl->nChannelsAPI; - nSamplesIn -= nSamplesFromInput; - - /* Default */ - psEnc->allowBandwidthSwitch = 0; - - /* Silk encoder */ - if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { - /* Enough data in input buffer, so encode */ - silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); - silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); - - /* Deal with LBRR data */ - if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { - /* Create space at start of payload for VAD and FEC flags */ - opus_uint8 iCDF[ 2 ] = { 0, 0 }; - iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); - ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); - - /* Encode any LBRR data from previous packet */ - /* Encode LBRR flags */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - LBRR_symbol = 0; - for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { - LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i ); - } - psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0; - if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) { - ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 ); - } - } - - /* Code LBRR indices and excitation signals */ - for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) { - opus_int condCoding; - - if( encControl->nChannelsInternal == 2 && n == 0 ) { - silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] ); - /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ - if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) { - silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] ); - } - } - /* Use conditional coding if previous frame available */ - if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) { - condCoding = CODE_CONDITIONALLY; - } else { - condCoding = CODE_INDEPENDENTLY; - } - silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding ); - silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType, - psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length ); - } - } - } - - /* Reset LBRR flags */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); - } - - psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc ); - } - - silk_HP_variable_cutoff( psEnc->state_Fxx ); - - /* Total target bits for packet */ - nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); - /* Subtract bits used for LBRR */ - if( !prefillFlag ) { - nBits -= psEnc->nBitsUsedLBRR; - } - /* Divide by number of uncoded frames left in packet */ - nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket ); - /* Convert to bits/second */ - if( encControl->payloadSize_ms == 10 ) { - TargetRate_bps = silk_SMULBB( nBits, 100 ); - } else { - TargetRate_bps = silk_SMULBB( nBits, 50 ); - } - /* Subtract fraction of bits in excess of target in previous frames and packets */ - TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); - if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) { - /* Compare actual vs target bits so far in this packet */ - opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; - TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); - } - /* Never exceed input bitrate */ - TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); - - /* Convert Left/Right to Mid/Side */ - if( encControl->nChannelsInternal == 2 ) { - silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], - psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], - MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, - psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); - if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { - /* Reset side channel encoder memory for first frame with side coding */ - if( psEnc->prev_decode_only_middle == 1 ) { - silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); - silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) ); - silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); - silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); - silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); - psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; - psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; - psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; - psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; - psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; - psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; - } - silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] ); - } else { - psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; - } - if( !prefillFlag ) { - silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); - if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { - silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); - } - } - } else { - /* Buffering */ - silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); - } - silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] ); - - /* Encode */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - opus_int maxBits, useCBR; - - /* Handling rate constraints */ - maxBits = encControl->maxBits; - if( tot_blocks == 2 && curr_block == 0 ) { - maxBits = maxBits * 3 / 5; - } else if( tot_blocks == 3 ) { - if( curr_block == 0 ) { - maxBits = maxBits * 2 / 5; - } else if( curr_block == 1 ) { - maxBits = maxBits * 3 / 4; - } - } - useCBR = encControl->useCBR && curr_block == tot_blocks - 1; - - if( encControl->nChannelsInternal == 1 ) { - channelRate_bps = TargetRate_bps; - } else { - channelRate_bps = MStargetRates_bps[ n ]; - if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) { - useCBR = 0; - /* Give mid up to 1/2 of the max bits for that frame */ - maxBits -= encControl->maxBits / ( tot_blocks * 2 ); - } - } - - if( channelRate_bps > 0 ) { - opus_int condCoding; - - silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps ); - - /* Use independent coding if no previous frame available */ - if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) { - condCoding = CODE_INDEPENDENTLY; - } else if( n > 0 && psEnc->prev_decode_only_middle ) { - /* If we skipped a side frame in this packet, we don't - need LTP scaling; the LTP state is well-defined. */ - condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; - } else { - condCoding = CODE_CONDITIONALLY; - } - if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) { - silk_assert( 0 ); - } - } - psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; - psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0; - psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++; - } - psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ]; - - /* Insert VAD and FEC flags at beginning of bitstream */ - if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) { - flags = 0; - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { - flags = silk_LSHIFT( flags, 1 ); - flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ]; - } - flags = silk_LSHIFT( flags, 1 ); - flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; - } - if( !prefillFlag ) { - ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); - } - - /* Return zero bytes if all channels DTXed */ - if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { - *nBytesOut = 0; - } - - psEnc->nBitsExceeded += *nBytesOut * 8; - psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); - psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 ); - - /* Update flag indicating if bandwidth switching is allowed */ - speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ), - SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms ); - if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) { - psEnc->allowBandwidthSwitch = 1; - psEnc->timeSinceSwitchAllowed_ms = 0; - } else { - psEnc->allowBandwidthSwitch = 0; - psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms; - } - } - - if( nSamplesIn == 0 ) { - break; - } - } else { - break; - } - curr_block++; - } - - psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; - - encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; - encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0; - encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); - encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14; - if( prefillFlag ) { - encControl->payloadSize_ms = tmp_payloadSize_ms; - encControl->complexity = tmp_complexity; - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; - psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0; - } - } - - RESTORE_STACK; - return ret; -} - diff --git a/thirdparty/opus/silk/encode_indices.c b/thirdparty/opus/silk/encode_indices.c deleted file mode 100644 index 666c8c0b13..0000000000 --- a/thirdparty/opus/silk/encode_indices.c +++ /dev/null @@ -1,181 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Encode side-information parameters to payload */ -void silk_encode_indices( - silk_encoder_state *psEncC, /* I/O Encoder state */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i, k, typeOffset; - opus_int encode_absolute_lagIndex, delta_lagIndex; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - const SideInfoIndices *psIndices; - - if( encode_LBRR ) { - psIndices = &psEncC->indices_LBRR[ FrameIndex ]; - } else { - psIndices = &psEncC->indices; - } - - /*******************************************/ - /* Encode signal type and quantizer offset */ - /*******************************************/ - typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType; - silk_assert( typeOffset >= 0 && typeOffset < 6 ); - silk_assert( encode_LBRR == 0 || typeOffset >= 2 ); - if( encode_LBRR || typeOffset >= 2 ) { - ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 ); - } else { - ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 ); - } - - /****************/ - /* Encode gains */ - /****************/ - /* first subframe */ - if( condCoding == CODE_CONDITIONALLY ) { - /* conditional coding */ - silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); - ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ], silk_delta_gain_iCDF, 8 ); - } else { - /* independent coding, in two stages: MSB bits followed by 3 LSBs */ - silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < N_LEVELS_QGAIN ); - ec_enc_icdf( psRangeEnc, silk_RSHIFT( psIndices->GainsIndices[ 0 ], 3 ), silk_gain_iCDF[ psIndices->signalType ], 8 ); - ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ] & 7, silk_uniform8_iCDF, 8 ); - } - - /* remaining subframes */ - for( i = 1; i < psEncC->nb_subfr; i++ ) { - silk_assert( psIndices->GainsIndices[ i ] >= 0 && psIndices->GainsIndices[ i ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); - ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 ); - } - - /****************/ - /* Encode NLSFs */ - /****************/ - ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 ); - silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] ); - silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder ); - for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) { - if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) { - ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); - } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) { - ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); - } else { - ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] + NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - } - } - - /* Encode NLSF interpolation factor */ - if( psEncC->nb_subfr == MAX_NB_SUBFR ) { - silk_assert( psIndices->NLSFInterpCoef_Q2 >= 0 && psIndices->NLSFInterpCoef_Q2 < 5 ); - ec_enc_icdf( psRangeEnc, psIndices->NLSFInterpCoef_Q2, silk_NLSF_interpolation_factor_iCDF, 8 ); - } - - if( psIndices->signalType == TYPE_VOICED ) - { - /*********************/ - /* Encode pitch lags */ - /*********************/ - /* lag index */ - encode_absolute_lagIndex = 1; - if( condCoding == CODE_CONDITIONALLY && psEncC->ec_prevSignalType == TYPE_VOICED ) { - /* Delta Encoding */ - delta_lagIndex = psIndices->lagIndex - psEncC->ec_prevLagIndex; - if( delta_lagIndex < -8 || delta_lagIndex > 11 ) { - delta_lagIndex = 0; - } else { - delta_lagIndex = delta_lagIndex + 9; - encode_absolute_lagIndex = 0; /* Only use delta */ - } - silk_assert( delta_lagIndex >= 0 && delta_lagIndex < 21 ); - ec_enc_icdf( psRangeEnc, delta_lagIndex, silk_pitch_delta_iCDF, 8 ); - } - if( encode_absolute_lagIndex ) { - /* Absolute encoding */ - opus_int32 pitch_high_bits, pitch_low_bits; - pitch_high_bits = silk_DIV32_16( psIndices->lagIndex, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); - pitch_low_bits = psIndices->lagIndex - silk_SMULBB( pitch_high_bits, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); - silk_assert( pitch_low_bits < psEncC->fs_kHz / 2 ); - silk_assert( pitch_high_bits < 32 ); - ec_enc_icdf( psRangeEnc, pitch_high_bits, silk_pitch_lag_iCDF, 8 ); - ec_enc_icdf( psRangeEnc, pitch_low_bits, psEncC->pitch_lag_low_bits_iCDF, 8 ); - } - psEncC->ec_prevLagIndex = psIndices->lagIndex; - - /* Countour index */ - silk_assert( psIndices->contourIndex >= 0 ); - silk_assert( ( psIndices->contourIndex < 34 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 4 ) || - ( psIndices->contourIndex < 11 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 4 ) || - ( psIndices->contourIndex < 12 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 2 ) || - ( psIndices->contourIndex < 3 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 2 ) ); - ec_enc_icdf( psRangeEnc, psIndices->contourIndex, psEncC->pitch_contour_iCDF, 8 ); - - /********************/ - /* Encode LTP gains */ - /********************/ - /* PERIndex value */ - silk_assert( psIndices->PERIndex >= 0 && psIndices->PERIndex < 3 ); - ec_enc_icdf( psRangeEnc, psIndices->PERIndex, silk_LTP_per_index_iCDF, 8 ); - - /* Codebook Indices */ - for( k = 0; k < psEncC->nb_subfr; k++ ) { - silk_assert( psIndices->LTPIndex[ k ] >= 0 && psIndices->LTPIndex[ k ] < ( 8 << psIndices->PERIndex ) ); - ec_enc_icdf( psRangeEnc, psIndices->LTPIndex[ k ], silk_LTP_gain_iCDF_ptrs[ psIndices->PERIndex ], 8 ); - } - - /**********************/ - /* Encode LTP scaling */ - /**********************/ - if( condCoding == CODE_INDEPENDENTLY ) { - silk_assert( psIndices->LTP_scaleIndex >= 0 && psIndices->LTP_scaleIndex < 3 ); - ec_enc_icdf( psRangeEnc, psIndices->LTP_scaleIndex, silk_LTPscale_iCDF, 8 ); - } - silk_assert( !condCoding || psIndices->LTP_scaleIndex == 0 ); - } - - psEncC->ec_prevSignalType = psIndices->signalType; - - /***************/ - /* Encode seed */ - /***************/ - silk_assert( psIndices->Seed >= 0 && psIndices->Seed < 4 ); - ec_enc_icdf( psRangeEnc, psIndices->Seed, silk_uniform4_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/encode_pulses.c b/thirdparty/opus/silk/encode_pulses.c deleted file mode 100644 index ab00264f99..0000000000 --- a/thirdparty/opus/silk/encode_pulses.c +++ /dev/null @@ -1,206 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/*********************************************/ -/* Encode quantization indices of excitation */ -/*********************************************/ - -static OPUS_INLINE opus_int combine_and_check( /* return ok */ - opus_int *pulses_comb, /* O */ - const opus_int *pulses_in, /* I */ - opus_int max_pulses, /* I max value for sum of pulses */ - opus_int len /* I number of output values */ -) -{ - opus_int k, sum; - - for( k = 0; k < len; k++ ) { - sum = pulses_in[ 2 * k ] + pulses_in[ 2 * k + 1 ]; - if( sum > max_pulses ) { - return 1; - } - pulses_comb[ k ] = sum; - } - - return 0; -} - -/* Encode quantization indices of excitation */ -void silk_encode_pulses( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I quantOffsetType */ - opus_int8 pulses[], /* I quantization indices */ - const opus_int frame_length /* I Frame length */ -) -{ - opus_int i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0; - opus_int32 abs_q, minSumBits_Q5, sumBits_Q5; - VARDECL( opus_int, abs_pulses ); - VARDECL( opus_int, sum_pulses ); - VARDECL( opus_int, nRshifts ); - opus_int pulses_comb[ 8 ]; - opus_int *abs_pulses_ptr; - const opus_int8 *pulses_ptr; - const opus_uint8 *cdf_ptr; - const opus_uint8 *nBits_ptr; - SAVE_STACK; - - silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/ - - /****************************/ - /* Prepare for shell coding */ - /****************************/ - /* Calculate number of shell blocks */ - silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); - iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); - if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { - silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ - iter++; - silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8)); - } - - /* Take the absolute value of the pulses */ - ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int ); - silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) ); - for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) { - abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] ); - abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] ); - abs_pulses[i+2] = ( opus_int )silk_abs( pulses[ i + 2 ] ); - abs_pulses[i+3] = ( opus_int )silk_abs( pulses[ i + 3 ] ); - } - - /* Calc sum pulses per shell code frame */ - ALLOC( sum_pulses, iter, opus_int ); - ALLOC( nRshifts, iter, opus_int ); - abs_pulses_ptr = abs_pulses; - for( i = 0; i < iter; i++ ) { - nRshifts[ i ] = 0; - - while( 1 ) { - /* 1+1 -> 2 */ - scale_down = combine_and_check( pulses_comb, abs_pulses_ptr, silk_max_pulses_table[ 0 ], 8 ); - /* 2+2 -> 4 */ - scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 1 ], 4 ); - /* 4+4 -> 8 */ - scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 2 ], 2 ); - /* 8+8 -> 16 */ - scale_down += combine_and_check( &sum_pulses[ i ], pulses_comb, silk_max_pulses_table[ 3 ], 1 ); - - if( scale_down ) { - /* We need to downscale the quantization signal */ - nRshifts[ i ]++; - for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { - abs_pulses_ptr[ k ] = silk_RSHIFT( abs_pulses_ptr[ k ], 1 ); - } - } else { - /* Jump out of while(1) loop and go to next shell coding frame */ - break; - } - } - abs_pulses_ptr += SHELL_CODEC_FRAME_LENGTH; - } - - /**************/ - /* Rate level */ - /**************/ - /* find rate level that leads to fewest bits for coding of pulses per block info */ - minSumBits_Q5 = silk_int32_MAX; - for( k = 0; k < N_RATE_LEVELS - 1; k++ ) { - nBits_ptr = silk_pulses_per_block_BITS_Q5[ k ]; - sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ]; - for( i = 0; i < iter; i++ ) { - if( nRshifts[ i ] > 0 ) { - sumBits_Q5 += nBits_ptr[ SILK_MAX_PULSES + 1 ]; - } else { - sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ]; - } - } - if( sumBits_Q5 < minSumBits_Q5 ) { - minSumBits_Q5 = sumBits_Q5; - RateLevelIndex = k; - } - } - ec_enc_icdf( psRangeEnc, RateLevelIndex, silk_rate_levels_iCDF[ signalType >> 1 ], 8 ); - - /***************************************************/ - /* Sum-Weighted-Pulses Encoding */ - /***************************************************/ - cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ]; - for( i = 0; i < iter; i++ ) { - if( nRshifts[ i ] == 0 ) { - ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 ); - } else { - ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, cdf_ptr, 8 ); - for( k = 0; k < nRshifts[ i ] - 1; k++ ) { - ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); - } - ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); - } - } - - /******************/ - /* Shell Encoding */ - /******************/ - for( i = 0; i < iter; i++ ) { - if( sum_pulses[ i ] > 0 ) { - silk_shell_encoder( psRangeEnc, &abs_pulses[ i * SHELL_CODEC_FRAME_LENGTH ] ); - } - } - - /****************/ - /* LSB Encoding */ - /****************/ - for( i = 0; i < iter; i++ ) { - if( nRshifts[ i ] > 0 ) { - pulses_ptr = &pulses[ i * SHELL_CODEC_FRAME_LENGTH ]; - nLS = nRshifts[ i ] - 1; - for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { - abs_q = (opus_int8)silk_abs( pulses_ptr[ k ] ); - for( j = nLS; j > 0; j-- ) { - bit = silk_RSHIFT( abs_q, j ) & 1; - ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 ); - } - bit = abs_q & 1; - ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 ); - } - } - } - - /****************/ - /* Encode signs */ - /****************/ - silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/errors.h b/thirdparty/opus/silk/errors.h deleted file mode 100644 index 45070800f2..0000000000 --- a/thirdparty/opus/silk/errors.h +++ /dev/null @@ -1,98 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_ERRORS_H -#define SILK_ERRORS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/******************/ -/* Error messages */ -/******************/ -#define SILK_NO_ERROR 0 - -/**************************/ -/* Encoder error messages */ -/**************************/ - -/* Input length is not a multiple of 10 ms, or length is longer than the packet length */ -#define SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES -101 - -/* Sampling frequency not 8000, 12000 or 16000 Hertz */ -#define SILK_ENC_FS_NOT_SUPPORTED -102 - -/* Packet size not 10, 20, 40, or 60 ms */ -#define SILK_ENC_PACKET_SIZE_NOT_SUPPORTED -103 - -/* Allocated payload buffer too short */ -#define SILK_ENC_PAYLOAD_BUF_TOO_SHORT -104 - -/* Loss rate not between 0 and 100 percent */ -#define SILK_ENC_INVALID_LOSS_RATE -105 - -/* Complexity setting not valid, use 0...10 */ -#define SILK_ENC_INVALID_COMPLEXITY_SETTING -106 - -/* Inband FEC setting not valid, use 0 or 1 */ -#define SILK_ENC_INVALID_INBAND_FEC_SETTING -107 - -/* DTX setting not valid, use 0 or 1 */ -#define SILK_ENC_INVALID_DTX_SETTING -108 - -/* CBR setting not valid, use 0 or 1 */ -#define SILK_ENC_INVALID_CBR_SETTING -109 - -/* Internal encoder error */ -#define SILK_ENC_INTERNAL_ERROR -110 - -/* Internal encoder error */ -#define SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR -111 - -/**************************/ -/* Decoder error messages */ -/**************************/ - -/* Output sampling frequency lower than internal decoded sampling frequency */ -#define SILK_DEC_INVALID_SAMPLING_FREQUENCY -200 - -/* Payload size exceeded the maximum allowed 1024 bytes */ -#define SILK_DEC_PAYLOAD_TOO_LARGE -201 - -/* Payload has bit errors */ -#define SILK_DEC_PAYLOAD_ERROR -202 - -/* Payload has bit errors */ -#define SILK_DEC_INVALID_FRAME_SIZE -203 - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/fixed/LTP_analysis_filter_FIX.c b/thirdparty/opus/silk/fixed/LTP_analysis_filter_FIX.c deleted file mode 100644 index 5574e7069f..0000000000 --- a/thirdparty/opus/silk/fixed/LTP_analysis_filter_FIX.c +++ /dev/null @@ -1,90 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -void silk_LTP_analysis_filter_FIX( - opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */ - const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */ - const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */ -) -{ - const opus_int16 *x_ptr, *x_lag_ptr; - opus_int16 Btmp_Q14[ LTP_ORDER ]; - opus_int16 *LTP_res_ptr; - opus_int k, i; - opus_int32 LTP_est; - - x_ptr = x; - LTP_res_ptr = LTP_res; - for( k = 0; k < nb_subfr; k++ ) { - - x_lag_ptr = x_ptr - pitchL[ k ]; - - Btmp_Q14[ 0 ] = LTPCoef_Q14[ k * LTP_ORDER ]; - Btmp_Q14[ 1 ] = LTPCoef_Q14[ k * LTP_ORDER + 1 ]; - Btmp_Q14[ 2 ] = LTPCoef_Q14[ k * LTP_ORDER + 2 ]; - Btmp_Q14[ 3 ] = LTPCoef_Q14[ k * LTP_ORDER + 3 ]; - Btmp_Q14[ 4 ] = LTPCoef_Q14[ k * LTP_ORDER + 4 ]; - - /* LTP analysis FIR filter */ - for( i = 0; i < subfr_length + pre_length; i++ ) { - LTP_res_ptr[ i ] = x_ptr[ i ]; - - /* Long-term prediction */ - LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 1 ], Btmp_Q14[ 1 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 0 ], Btmp_Q14[ 2 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -1 ], Btmp_Q14[ 3 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -2 ], Btmp_Q14[ 4 ] ); - - LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/ - - /* Subtract long-term prediction */ - LTP_res_ptr[ i ] = (opus_int16)silk_SAT16( (opus_int32)x_ptr[ i ] - LTP_est ); - - /* Scale residual */ - LTP_res_ptr[ i ] = silk_SMULWB( invGains_Q16[ k ], LTP_res_ptr[ i ] ); - - x_lag_ptr++; - } - - /* Update pointers */ - LTP_res_ptr += subfr_length + pre_length; - x_ptr += subfr_length; - } -} - diff --git a/thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c deleted file mode 100644 index 3dcedef891..0000000000 --- a/thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -/* Calculation of LTP state scaling */ -void silk_LTP_scale_ctrl_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int round_loss; - - if( condCoding == CODE_INDEPENDENTLY ) { - /* Only scale if first frame in packet */ - round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; - psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( - silk_SMULWB( silk_SMULBB( round_loss, psEncCtrl->LTPredCodGain_Q7 ), SILK_FIX_CONST( 0.1, 9 ) ), 0, 2 ); - } else { - /* Default is minimum scaling */ - psEnc->sCmn.indices.LTP_scaleIndex = 0; - } - psEncCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ]; -} diff --git a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c b/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c deleted file mode 100644 index 4502b7130e..0000000000 --- a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c +++ /dev/null @@ -1,101 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Apply sine window to signal vector. */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -/* Every other sample is linearly interpolated, for speed. */ -/* Window length must be between 16 and 120 (incl) and a multiple of 4. */ - -/* Matlab code for table: - for k=16:9*4:16+2*9*4, fprintf(' %7.d,', -round(65536*pi ./ (k:4:k+8*4))); fprintf('\n'); end -*/ -static const opus_int16 freq_table_Q16[ 27 ] = { - 12111, 9804, 8235, 7100, 6239, 5565, 5022, 4575, 4202, - 3885, 3612, 3375, 3167, 2984, 2820, 2674, 2542, 2422, - 2313, 2214, 2123, 2038, 1961, 1889, 1822, 1760, 1702, -}; - -void silk_apply_sine_window( - opus_int16 px_win[], /* O Pointer to windowed signal */ - const opus_int16 px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -) -{ - opus_int k, f_Q16, c_Q16; - opus_int32 S0_Q16, S1_Q16; - - silk_assert( win_type == 1 || win_type == 2 ); - - /* Length must be in a range from 16 to 120 and a multiple of 4 */ - silk_assert( length >= 16 && length <= 120 ); - silk_assert( ( length & 3 ) == 0 ); - - /* Frequency */ - k = ( length >> 2 ) - 4; - silk_assert( k >= 0 && k <= 26 ); - f_Q16 = (opus_int)freq_table_Q16[ k ]; - - /* Factor used for cosine approximation */ - c_Q16 = silk_SMULWB( (opus_int32)f_Q16, -f_Q16 ); - silk_assert( c_Q16 >= -32768 ); - - /* initialize state */ - if( win_type == 1 ) { - /* start from 0 */ - S0_Q16 = 0; - /* approximation of sin(f) */ - S1_Q16 = f_Q16 + silk_RSHIFT( length, 3 ); - } else { - /* start from 1 */ - S0_Q16 = ( (opus_int32)1 << 16 ); - /* approximation of cos(f) */ - S1_Q16 = ( (opus_int32)1 << 16 ) + silk_RSHIFT( c_Q16, 1 ) + silk_RSHIFT( length, 4 ); - } - - /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */ - /* 4 samples at a time */ - for( k = 0; k < length; k += 4 ) { - px_win[ k ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k ] ); - px_win[ k + 1 ] = (opus_int16)silk_SMULWB( S1_Q16, px[ k + 1] ); - S0_Q16 = silk_SMULWB( S1_Q16, c_Q16 ) + silk_LSHIFT( S1_Q16, 1 ) - S0_Q16 + 1; - S0_Q16 = silk_min( S0_Q16, ( (opus_int32)1 << 16 ) ); - - px_win[ k + 2 ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k + 2] ); - px_win[ k + 3 ] = (opus_int16)silk_SMULWB( S0_Q16, px[ k + 3 ] ); - S1_Q16 = silk_SMULWB( S0_Q16, c_Q16 ) + silk_LSHIFT( S0_Q16, 1 ) - S1_Q16; - S1_Q16 = silk_min( S1_Q16, ( (opus_int32)1 << 16 ) ); - } -} diff --git a/thirdparty/opus/silk/fixed/autocorr_FIX.c b/thirdparty/opus/silk/fixed/autocorr_FIX.c deleted file mode 100644 index de95c98693..0000000000 --- a/thirdparty/opus/silk/fixed/autocorr_FIX.c +++ /dev/null @@ -1,48 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "celt_lpc.h" - -/* Compute autocorrelation */ -void silk_autocorr( - opus_int32 *results, /* O Result (length correlationCount) */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *inputData, /* I Input data to correlate */ - const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount, /* I Number of correlation taps to compute */ - int arch /* I Run-time architecture */ -) -{ - opus_int corrCount; - corrCount = silk_min_int( inputDataSize, correlationCount ); - *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch); -} diff --git a/thirdparty/opus/silk/fixed/burg_modified_FIX.c b/thirdparty/opus/silk/fixed/burg_modified_FIX.c deleted file mode 100644 index 17d0e0993c..0000000000 --- a/thirdparty/opus/silk/fixed/burg_modified_FIX.c +++ /dev/null @@ -1,280 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "define.h" -#include "tuning_parameters.h" -#include "pitch.h" - -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ - -#define QA 25 -#define N_BITS_HEAD_ROOM 2 -#define MIN_RSHIFTS -16 -#define MAX_RSHIFTS (32 - QA) - -/* Compute reflection coefficients from input signal */ -void silk_burg_modified_c( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -) -{ - opus_int k, n, s, lz, rshifts, reached_max_gain; - opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; - const opus_int16 *x_ptr; - opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; - opus_int64 C0_64; - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); - - /* Compute autocorrelations, added over subframes */ - C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); - lz = silk_CLZ64(C0_64); - rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; - if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; - if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS; - - if (rshifts > 0) { - C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts ); - } else { - C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts ); - } - - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - int i; - opus_int32 d; - x_ptr = x + s * subfr_length; - celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); - for( n = 1; n < D + 1; n++ ) { - for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) - d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); - xcorr[ n - 1 ] += d; - } - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); - } - } - } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - - /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - - invGain_Q30 = (opus_int32)1 << 30; - reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ - if( rshifts > -2 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ - for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp_QA = Af_QA[ k ]; - tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ - tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ - } - tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ - tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ - for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ - for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32), - but they cancel each other and the real result seems to always fit in a 32-bit - signed integer. This was determined experimentally, not theoretically (unfortunately). */ - tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ - tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ - } - tmp1 = -tmp1; /* Q17 */ - tmp2 = -tmp2; /* Q17 */ - for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, - silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, - silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ - } - } - } - - /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ - tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ - num = 0; /* Q( -rshifts ) */ - nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ - for( k = 0; k < n; k++ ) { - Atmp_QA = Af_QA[ k ]; - lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; - lz = silk_min( 32 - QA, lz ); - Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ - - tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), - Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ - } - CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ - CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ - num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ - num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ - - /* Calculate the next order reflection (parcor) coefficient */ - if( silk_abs( num ) < nrg ) { - rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); - } else { - rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; - } - - /* Update inverse prediction gain */ - tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); - if( tmp1 <= minInvGain_Q30 ) { - /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ - tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ - rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ - if( rc_Q31 > 0 ) { - /* Newton-Raphson iteration */ - rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ - rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ - if( num < 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc_Q31 = -rc_Q31; - } - } - invGain_Q30 = minInvGain_Q30; - reached_max_gain = 1; - } else { - invGain_Q30 = tmp1; - } - - /* Update the AR coefficients */ - for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af_QA[ k ]; /* QA */ - tmp2 = Af_QA[ n - k - 1 ]; /* QA */ - Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ - Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ - } - Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ - - if( reached_max_gain ) { - /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ - for( k = n + 1; k < D; k++ ) { - Af_QA[ k ] = 0; - } - break; - } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; /* Q( -rshifts ) */ - tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ - CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - } - } - - if( reached_max_gain ) { - for( k = 0; k < D; k++ ) { - /* Scale coefficients */ - A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); - } - /* Subtract energy of preceding samples from C0 */ - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts); - } - } - /* Approximate residual energy */ - *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); - *res_nrg_Q = -rshifts; - } else { - /* Return residual energy */ - nrg = CAf[ 0 ]; /* Q( -rshifts ) */ - tmp1 = (opus_int32)1 << 16; /* Q16 */ - for( k = 0; k < D; k++ ) { - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ - nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ - tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ - A_Q16[ k ] = -Atmp1; - } - *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ - *res_nrg_Q = -rshifts; - } -} diff --git a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c b/thirdparty/opus/silk/fixed/corrMatrix_FIX.c deleted file mode 100644 index c1d437c785..0000000000 --- a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c +++ /dev/null @@ -1,158 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/********************************************************************** - * Correlation Matrix Computations for LS estimate. - **********************************************************************/ - -#include "main_FIX.h" - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int16 *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ - const opus_int rshifts, /* I Right shifts of correlations */ - int arch /* I Run-time architecture */ -) -{ - opus_int lag, i; - const opus_int16 *ptr1, *ptr2; - opus_int32 inner_prod; - - ptr1 = &x[ order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */ - ptr2 = t; - /* Calculate X'*t */ - if( rshifts > 0 ) { - /* Right shifting used */ - for( lag = 0; lag < order; lag++ ) { - inner_prod = 0; - for( i = 0; i < L; i++ ) { - inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts ); - } - Xt[ lag ] = inner_prod; /* X[:,lag]'*t */ - ptr1--; /* Go to next column of X */ - } - } else { - silk_assert( rshifts == 0 ); - for( lag = 0; lag < order; lag++ ) { - Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); /* X[:,lag]'*t */ - ptr1--; /* Go to next column of X */ - } - } -} - -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - const opus_int head_room, /* I Desired headroom */ - opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int *rshifts, /* I/O Right shifts of correlations */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, j, lag, rshifts_local, head_room_rshifts; - opus_int32 energy; - const opus_int16 *ptr1, *ptr2; - - /* Calculate energy to find shift used to fit in 32 bits */ - silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); - /* Add shifts to get the desired head room */ - head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); - - energy = silk_RSHIFT32( energy, head_room_rshifts ); - rshifts_local += head_room_rshifts; - - /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ - /* Remove contribution of first order - 1 samples */ - for( i = 0; i < order - 1; i++ ) { - energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); - } - if( rshifts_local < *rshifts ) { - /* Adjust energy */ - energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); - rshifts_local = *rshifts; - } - - /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ - /* Fill out the diagonal of the correlation matrix */ - matrix_ptr( XX, 0, 0, order ) = energy; - ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ - for( j = 1; j < order; j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); - matrix_ptr( XX, j, j, order ) = energy; - } - - ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ - /* Calculate the remaining elements of the correlation matrix */ - if( rshifts_local > 0 ) { - /* Right shifting used */ - for( lag = 1; lag < order; lag++ ) { - /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ - energy = 0; - for( i = 0; i < L; i++ ) { - energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); - } - /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ - matrix_ptr( XX, lag, 0, order ) = energy; - matrix_ptr( XX, 0, lag, order ) = energy; - for( j = 1; j < ( order - lag ); j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); - matrix_ptr( XX, lag + j, j, order ) = energy; - matrix_ptr( XX, j, lag + j, order ) = energy; - } - ptr2--; /* Update pointer to first sample of next column (lag) in X */ - } - } else { - for( lag = 1; lag < order; lag++ ) { - /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ - energy = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); - matrix_ptr( XX, lag, 0, order ) = energy; - matrix_ptr( XX, 0, lag, order ) = energy; - /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ - for( j = 1; j < ( order - lag ); j++ ) { - energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) ); - energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] ); - matrix_ptr( XX, lag + j, j, order ) = energy; - matrix_ptr( XX, j, lag + j, order ) = energy; - } - ptr2--;/* Update pointer to first sample of next column (lag) in X */ - } - } - *rshifts = rshifts_local; -} - diff --git a/thirdparty/opus/silk/fixed/encode_frame_FIX.c b/thirdparty/opus/silk/fixed/encode_frame_FIX.c deleted file mode 100644 index 5ef44b03fc..0000000000 --- a/thirdparty/opus/silk/fixed/encode_frame_FIX.c +++ /dev/null @@ -1,387 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int32 xfw_Q3[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -); - -void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ -) -{ - /****************************/ - /* Voice Activity Detection */ - /****************************/ - silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); - - /**************************************************/ - /* Convert speech activity into VAD and DTX flags */ - /**************************************************/ - if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { - psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; - psEnc->sCmn.noSpeechCounter++; - if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.inDTX = 0; - } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; - psEnc->sCmn.inDTX = 0; - } - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; - } else { - psEnc->sCmn.noSpeechCounter = 0; - psEnc->sCmn.inDTX = 0; - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - } -} - -/****************/ -/* Encode frame */ -/****************/ -opus_int silk_encode_frame_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -) -{ - silk_encoder_control_FIX sEncCtrl; - opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; - opus_int16 *x_frame; - ec_enc sRangeEnc_copy, sRangeEnc_copy2; - silk_nsq_state sNSQ_copy, sNSQ_copy2; - opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; - opus_int32 gainsID, gainsID_lower, gainsID_upper; - opus_int16 gainMult_Q8; - opus_int16 ec_prevLagIndex_copy; - opus_int ec_prevSignalType_copy; - opus_int8 LastGainIndex_copy2; - SAVE_STACK; - - /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ - LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; - - psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; - - /**************************************************************/ - /* Set up Input Pointers, and insert frame in input buffer */ - /*************************************************************/ - /* start of frame to encode */ - x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; - - /***************************************/ - /* Ensure smooth bandwidth transitions */ - /***************************************/ - silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); - - /*******************************************/ - /* Copy new frame to front of input buffer */ - /*******************************************/ - silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) ); - - if( !psEnc->sCmn.prefillFlag ) { - VARDECL( opus_int32, xfw_Q3 ); - VARDECL( opus_int16, res_pitch ); - VARDECL( opus_uint8, ec_buf_copy ); - opus_int16 *res_pitch_frame; - - ALLOC( res_pitch, - psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length - + psEnc->sCmn.ltp_mem_length, opus_int16 ); - /* start of pitch LPC residual frame */ - res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; - - /*****************************************/ - /* Find pitch lags, initial LPC analysis */ - /*****************************************/ - silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); - - /************************/ - /* Noise shape analysis */ - /************************/ - silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch ); - - /***************************************************/ - /* Find linear prediction coefficients (LPC + LTP) */ - /***************************************************/ - silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); - - /****************************************/ - /* Process gains */ - /****************************************/ - silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding ); - - /*****************************************/ - /* Prefiltering for noise shaper */ - /*****************************************/ - ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 ); - silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame ); - - /****************************************/ - /* Low Bitrate Redundant Encoding */ - /****************************************/ - silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding ); - - /* Loop over quantizer and entropy coding to control bitrate */ - maxIter = 6; - gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); - found_lower = 0; - found_upper = 0; - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - gainsID_lower = -1; - gainsID_upper = -1; - /* Copy part of the input state */ - silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); - silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - seed_copy = psEnc->sCmn.indices.Seed; - ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; - ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; - ALLOC( ec_buf_copy, 1275, opus_uint8 ); - for( iter = 0; ; iter++ ) { - if( gainsID == gainsID_lower ) { - nBits = nBits_lower; - } else if( gainsID == gainsID_upper ) { - nBits = nBits_upper; - } else { - /* Restore part of the input state */ - if( iter > 0 ) { - silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); - psEnc->sCmn.indices.Seed = seed_copy; - psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; - psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; - } - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, - sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, - psEnc->sCmn.arch ); - } else { - silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, - sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, - psEnc->sCmn.arch); - } - - /****************************************/ - /* Encode Parameters */ - /****************************************/ - silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); - - /****************************************/ - /* Encode Excitation Signal */ - /****************************************/ - silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, - psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); - - nBits = ec_tell( psRangeEnc ); - - if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { - break; - } - } - - if( iter == maxIter ) { - if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { - /* Restore output state from earlier iteration that did meet the bitrate budget */ - silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - silk_assert( sRangeEnc_copy2.offs <= 1275 ); - silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); - psEnc->sShape.LastGainIndex = LastGainIndex_copy2; - } - break; - } - - if( nBits > maxBits ) { - if( found_lower == 0 && iter >= 2 ) { - /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ - sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 ); - found_upper = 0; - gainsID_upper = -1; - } else { - found_upper = 1; - nBits_upper = nBits; - gainMult_upper = gainMult_Q8; - gainsID_upper = gainsID; - } - } else if( nBits < maxBits - 5 ) { - found_lower = 1; - nBits_lower = nBits; - gainMult_lower = gainMult_Q8; - if( gainsID != gainsID_lower ) { - gainsID_lower = gainsID; - /* Copy part of the output state */ - silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - silk_assert( psRangeEnc->offs <= 1275 ); - silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); - silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; - } - } else { - /* Within 5 bits of budget: close enough */ - break; - } - - if( ( found_lower & found_upper ) == 0 ) { - /* Adjust gain according to high-rate rate/distortion curve */ - opus_int32 gain_factor_Q16; - gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); - gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); - if( nBits > maxBits ) { - gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); - } - gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); - } else { - /* Adjust gain by interpolating */ - gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower ); - /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ - if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); - } else - if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); - } - - /* Quantize gains */ - psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16, - &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Unique identifier of gains vector */ - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - } - } - - /* Update input buffer */ - silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], - ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) ); - - /* Exit without entropy coding */ - if( psEnc->sCmn.prefillFlag ) { - /* No payload */ - *pnBytesOut = 0; - RESTORE_STACK; - return ret; - } - - /* Parameters needed for next frame */ - psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; - - /****************************************/ - /* Finalize payload */ - /****************************************/ - psEnc->sCmn.first_frame_after_reset = 0; - /* Payload size */ - *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); - - RESTORE_STACK; - return ret; -} - -/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int32 xfw_Q3[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -) -{ - opus_int32 TempGains_Q16[ MAX_NB_SUBFR ]; - SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; - silk_nsq_state sNSQ_LBRR; - - /*******************************************/ - /* Control use of inband LBRR */ - /*******************************************/ - if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { - psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - - /* Copy noise shaping quantizer state and quantization indices from regular encoding */ - silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); - - /* Save original gains */ - silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - - if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { - /* First frame in packet or previous frame not LBRR coded */ - psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; - - /* Increase Gains to get target LBRR rate */ - psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases; - psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); - } - - /* Decode to get gains in sync with decoder */ - /* Overwrite unquantized gains with quantized gains */ - silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices, - &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, - psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, - psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); - } else { - silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, - psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, - psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); - } - - /* Restore original gains */ - silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - } -} diff --git a/thirdparty/opus/silk/fixed/find_LPC_FIX.c b/thirdparty/opus/silk/fixed/find_LPC_FIX.c deleted file mode 100644 index e11cdc86e6..0000000000 --- a/thirdparty/opus/silk/fixed/find_LPC_FIX.c +++ /dev/null @@ -1,151 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Finds LPC vector from correlations, and converts to NLSF */ -void silk_find_LPC_FIX( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const opus_int16 x[], /* I Input signal */ - const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */ -) -{ - opus_int k, subfr_length; - opus_int32 a_Q16[ MAX_LPC_ORDER ]; - opus_int isInterpLower, shift; - opus_int32 res_nrg0, res_nrg1; - opus_int rshift0, rshift1; - - /* Used only for LSF interpolation */ - opus_int32 a_tmp_Q16[ MAX_LPC_ORDER ], res_nrg_interp, res_nrg, res_tmp_nrg; - opus_int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q; - opus_int16 a_tmp_Q12[ MAX_LPC_ORDER ]; - opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ]; - SAVE_STACK; - - subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; - - /* Default: no interpolation */ - psEncC->indices.NLSFInterpCoef_Q2 = 4; - - /* Burg AR analysis for the full frame */ - silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch ); - - if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { - VARDECL( opus_int16, LPC_res ); - - /* Optimal solution for last 10 ms */ - silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch ); - - /* subtract residual energy here, as that's easier than adding it to the */ - /* residual energy of the first 10 ms in each iteration of the search below */ - shift = res_tmp_nrg_Q - res_nrg_Q; - if( shift >= 0 ) { - if( shift < 32 ) { - res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift ); - } - } else { - silk_assert( shift > -32 ); - res_nrg = silk_RSHIFT( res_nrg, -shift ) - res_tmp_nrg; - res_nrg_Q = res_tmp_nrg_Q; - } - - /* Convert to NLSFs */ - silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder ); - - ALLOC( LPC_res, 2 * subfr_length, opus_int16 ); - - /* Search over interpolation indices to find the one with lowest residual energy */ - for( k = 3; k >= 0; k-- ) { - /* Interpolate NLSFs for first half */ - silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); - - /* Convert to LPC for residual energy evaluation */ - silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder ); - - /* Calculate residual energy with NLSF interpolation */ - silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch ); - - silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ); - silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ); - - /* Add subframe energies from first half frame */ - shift = rshift0 - rshift1; - if( shift >= 0 ) { - res_nrg1 = silk_RSHIFT( res_nrg1, shift ); - res_nrg_interp_Q = -rshift0; - } else { - res_nrg0 = silk_RSHIFT( res_nrg0, -shift ); - res_nrg_interp_Q = -rshift1; - } - res_nrg_interp = silk_ADD32( res_nrg0, res_nrg1 ); - - /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */ - shift = res_nrg_interp_Q - res_nrg_Q; - if( shift >= 0 ) { - if( silk_RSHIFT( res_nrg_interp, shift ) < res_nrg ) { - isInterpLower = silk_TRUE; - } else { - isInterpLower = silk_FALSE; - } - } else { - if( -shift < 32 ) { - if( res_nrg_interp < silk_RSHIFT( res_nrg, -shift ) ) { - isInterpLower = silk_TRUE; - } else { - isInterpLower = silk_FALSE; - } - } else { - isInterpLower = silk_FALSE; - } - } - - /* Determine whether current interpolated NLSFs are best so far */ - if( isInterpLower == silk_TRUE ) { - /* Interpolation has lower residual energy */ - res_nrg = res_nrg_interp; - res_nrg_Q = res_nrg_interp_Q; - psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k; - } - } - } - - if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) { - /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ - silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder ); - } - - silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/find_LTP_FIX.c b/thirdparty/opus/silk/fixed/find_LTP_FIX.c deleted file mode 100644 index 1314a28137..0000000000 --- a/thirdparty/opus/silk/fixed/find_LTP_FIX.c +++ /dev/null @@ -1,245 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "tuning_parameters.h" - -/* Head room for correlations */ -#define LTP_CORRS_HEAD_ROOM 2 - -void silk_fit_LTP( - opus_int32 LTP_coefs_Q16[ LTP_ORDER ], - opus_int16 LTP_coefs_Q14[ LTP_ORDER ] -); - -void silk_find_LTP_FIX( - opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ - const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ - const opus_int subfr_length, /* I subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset, /* I number of samples in LTP memory */ - opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, k, lshift; - const opus_int16 *r_ptr, *lag_ptr; - opus_int16 *b_Q14_ptr; - - opus_int32 regu; - opus_int32 *WLTP_ptr; - opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26; - opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits; - - opus_int32 temp32, denom32; - opus_int extra_shifts; - opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; - opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; - opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; - opus_int32 wd, m_Q12; - - b_Q14_ptr = b_Q14; - WLTP_ptr = WLTP; - r_ptr = &r_lpc[ mem_offset ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); - - silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */ - - /* Assure headroom */ - LZs = silk_CLZ32( rr[k] ); - if( LZs < LTP_CORRS_HEAD_ROOM ) { - rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs ); - rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); - } - corr_rshifts[ k ] = rr_shifts; - silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ - - /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ - silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ - if( corr_rshifts[ k ] > rr_shifts ) { - rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ - } - silk_assert( rr[ k ] >= 0 ); - - regu = 1; - regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); - regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); - regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); - silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER ); - - silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */ - - /* Limit and store in Q14 */ - silk_fit_LTP( b_Q16, b_Q14_ptr ); - - /* Calculate residual energy */ - nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */ - - /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ - extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM ); - denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ - silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ - denom32 = silk_max( denom32, 1 ); - silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */ - temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ - temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */ - - /* Limit temp such that the below scaling never wraps around */ - WLTP_max = 0; - for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max ); - } - lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */ - silk_assert( 26 - 18 + lshift >= 0 ); - if( 26 - 18 + lshift < 31 ) { - temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) ); - } - - silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ - - w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */ - silk_assert( w[k] >= 0 ); - - r_ptr += subfr_length; - b_Q14_ptr += LTP_ORDER; - WLTP_ptr += LTP_ORDER * LTP_ORDER; - } - - maxRshifts = 0; - for( k = 0; k < nb_subfr; k++ ) { - maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts ); - } - - /* Compute LTP coding gain */ - if( LTPredCodGain_Q7 != NULL ) { - LPC_LTP_res_nrg = 0; - LPC_res_nrg = 0; - silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */ - for( k = 0; k < nb_subfr; k++ ) { - LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ - LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ - } - LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */ - - div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 ); - *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ); - - silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) ); - } - - /* smoothing */ - /* d = sum( B, 1 ); */ - b_Q14_ptr = b_Q14; - for( k = 0; k < nb_subfr; k++ ) { - d_Q14[ k ] = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - d_Q14[ k ] += b_Q14_ptr[ i ]; - } - b_Q14_ptr += LTP_ORDER; - } - - /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ - - /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ - max_abs_d_Q14 = 0; - max_w_bits = 0; - for( k = 0; k < nb_subfr; k++ ) { - max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) ); - /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ - /* Find bits needed in Q( 18 - maxRshifts ) */ - max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts ); - } - - /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */ - silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) ); - - /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ - extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14; - - /* Subtract what we got available; bits in output var plus maxRshifts */ - extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */ - extra_shifts = silk_max_int( extra_shifts, 0 ); - - maxRshifts_wxtra = maxRshifts + extra_shifts; - - temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ - wd = 0; - for( k = 0; k < nb_subfr; k++ ) { - /* w has at least 2 bits of headroom so no overflow should happen */ - temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */ - wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */ - } - m_Q12 = silk_DIV32_varQ( wd, temp32, 12 ); - - b_Q14_ptr = b_Q14; - for( k = 0; k < nb_subfr; k++ ) { - /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ - if( 2 - corr_rshifts[k] > 0 ) { - temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] ); - } else { - temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 ); - } - - g_Q26 = silk_MUL( - silk_DIV32( - SILK_FIX_CONST( LTP_SMOOTHING, 26 ), - silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */ - silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */ - - temp32 = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */ - temp32 += delta_b_Q14[ i ]; /* Q14 */ - } - temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */ - for( i = 0; i < LTP_ORDER; i++ ) { - b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 ); - } - b_Q14_ptr += LTP_ORDER; - } -} - -void silk_fit_LTP( - opus_int32 LTP_coefs_Q16[ LTP_ORDER ], - opus_int16 LTP_coefs_Q14[ LTP_ORDER ] -) -{ - opus_int i; - - for( i = 0; i < LTP_ORDER; i++ ) { - LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) ); - } -} diff --git a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c b/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c deleted file mode 100644 index b8440a8247..0000000000 --- a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c +++ /dev/null @@ -1,145 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Find pitch lags */ -void silk_find_pitch_lags_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int16 res[], /* O residual */ - const opus_int16 x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int buf_len, i, scale; - opus_int32 thrhld_Q13, res_nrg; - const opus_int16 *x_buf, *x_buf_ptr; - VARDECL( opus_int16, Wsig ); - opus_int16 *Wsig_ptr; - opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; - opus_int16 rc_Q15[ MAX_FIND_PITCH_LPC_ORDER ]; - opus_int32 A_Q24[ MAX_FIND_PITCH_LPC_ORDER ]; - opus_int16 A_Q12[ MAX_FIND_PITCH_LPC_ORDER ]; - SAVE_STACK; - - /******************************************/ - /* Set up buffer lengths etc based on Fs */ - /******************************************/ - buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; - - /* Safety check */ - silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); - - x_buf = x - psEnc->sCmn.ltp_mem_length; - - /*************************************/ - /* Estimate LPC AR coefficients */ - /*************************************/ - - /* Calculate windowed signal */ - - ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 ); - - /* First LA_LTP samples */ - x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; - Wsig_ptr = Wsig; - silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); - - /* Middle un - windowed samples */ - Wsig_ptr += psEnc->sCmn.la_pitch; - x_buf_ptr += psEnc->sCmn.la_pitch; - silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); - - /* Last LA_LTP samples */ - Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); - - /* Calculate autocorrelation sequence */ - silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch ); - - /* Add white noise, as fraction of energy */ - auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1; - - /* Calculate the reflection coefficients using schur */ - res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Prediction gain */ - psEncCtrl->predGain_Q16 = silk_DIV32_varQ( auto_corr[ 0 ], silk_max_int( res_nrg, 1 ), 16 ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a( A_Q24, rc_Q15, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Convert From 32 bit Q24 to 16 bit Q12 coefs */ - for( i = 0; i < psEnc->sCmn.pitchEstimationLPCOrder; i++ ) { - A_Q12[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( A_Q24[ i ], 12 ) ); - } - - /* Do BWE */ - silk_bwexpander( A_Q12, psEnc->sCmn.pitchEstimationLPCOrder, SILK_FIX_CONST( FIND_PITCH_BANDWIDTH_EXPANSION, 16 ) ); - - /*****************************************/ - /* LPC analysis filtering */ - /*****************************************/ - silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch ); - - if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { - /* Threshold for pitch estimator */ - thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 ); - thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder ); - thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 21 ), psEnc->sCmn.speech_activity_Q8 ); - thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15, 13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) ); - thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 14 ), psEnc->sCmn.input_tilt_Q15 ); - thrhld_Q13 = silk_SAT16( thrhld_Q13 ); - - /*****************************************/ - /* Call pitch estimator */ - /*****************************************/ - if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, - &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16, - (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, - psEnc->sCmn.arch) == 0 ) - { - psEnc->sCmn.indices.signalType = TYPE_VOICED; - } else { - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - } - } else { - silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); - psEnc->sCmn.indices.lagIndex = 0; - psEnc->sCmn.indices.contourIndex = 0; - psEnc->LTPCorr_Q15 = 0; - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c b/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c deleted file mode 100644 index d308e9cf5f..0000000000 --- a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c +++ /dev/null @@ -1,148 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" - -void silk_find_pred_coefs_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - const opus_int16 res_pitch[], /* I Residual from pitch analysis */ - const opus_int16 x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i; - opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ]; - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; - const opus_int16 *x_ptr; - opus_int16 *x_pre_ptr; - VARDECL( opus_int16, LPC_in_pre ); - opus_int32 tmp, min_gain_Q16, minInvGain_Q30; - opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ]; - SAVE_STACK; - - /* weighting for weighted least squares */ - min_gain_Q16 = silk_int32_MAX >> 6; - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - min_gain_Q16 = silk_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] ); - } - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - /* Divide to Q16 */ - silk_assert( psEncCtrl->Gains_Q16[ i ] > 0 ); - /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ - invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 ); - - /* Ensure Wght_Q15 a minimum value 1 */ - invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 ); - - /* Square the inverted gains */ - silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) ); - tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] ); - Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 ); - - /* Invert the inverted and normalized gains */ - local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] ); - } - - ALLOC( LPC_in_pre, - psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder - + psEnc->sCmn.frame_length, opus_int16 ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - VARDECL( opus_int32, WLTP ); - - /**********/ - /* VOICED */ - /**********/ - silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); - - ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 ); - - /* LTP analysis */ - silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, - res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, - psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch ); - - /* Quantize LTP gain parameters */ - silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, - psEnc->sCmn.arch); - - /* Control LTP scaling */ - silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding ); - - /* Create LTP residual */ - silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14, - psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); - - } else { - /************/ - /* UNVOICED */ - /************/ - /* Create signal with prepended subframes, scaled by inverse gains */ - x_ptr = x - psEnc->sCmn.predictLPCOrder; - x_pre_ptr = LPC_in_pre; - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ], - psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); - x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; - x_ptr += psEnc->sCmn.subfr_length; - } - - silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) ); - psEncCtrl->LTPredCodGain_Q7 = 0; - psEnc->sCmn.sum_log_gain_Q7 = 0; - } - - /* Limit on total predictive coding gain */ - if( psEnc->sCmn.first_frame_after_reset ) { - minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 ); - } else { - minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */ - minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, - silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), - silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 ); - } - - /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ - silk_find_LPC_FIX( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain_Q30 ); - - /* Quantize LSFs */ - silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 ); - - /* Calculate residual energy using quantized LPC coefficients */ - silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains, - psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder, psEnc->sCmn.arch ); - - /* Copy to prediction struct for use in next frame for interpolation */ - silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/k2a_FIX.c b/thirdparty/opus/silk/fixed/k2a_FIX.c deleted file mode 100644 index 5fee599bcb..0000000000 --- a/thirdparty/opus/silk/fixed/k2a_FIX.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */ - const opus_int32 order /* I Prediction order */ -) -{ - opus_int k, n; - opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; - - for( k = 0; k < order; k++ ) { - for( n = 0; n < k; n++ ) { - Atmp[ n ] = A_Q24[ n ]; - } - for( n = 0; n < k; n++ ) { - A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] ); - } - A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 ); - } -} diff --git a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c b/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c deleted file mode 100644 index 3b03987544..0000000000 --- a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a_Q16( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ - const opus_int32 order /* I Prediction order */ -) -{ - opus_int k, n; - opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; - - for( k = 0; k < order; k++ ) { - for( n = 0; n < k; n++ ) { - Atmp[ n ] = A_Q24[ n ]; - } - for( n = 0; n < k; n++ ) { - A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] ); - } - A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 ); - } -} diff --git a/thirdparty/opus/silk/fixed/main_FIX.h b/thirdparty/opus/silk/fixed/main_FIX.h deleted file mode 100644 index 375b5eb32e..0000000000 --- a/thirdparty/opus/silk/fixed/main_FIX.h +++ /dev/null @@ -1,272 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MAIN_FIX_H -#define SILK_MAIN_FIX_H - -#include "SigProc_FIX.h" -#include "structs_FIX.h" -#include "control.h" -#include "main.h" -#include "PLC.h" -#include "debug.h" -#include "entenc.h" - -#ifndef FORCE_CPP_BUILD -#ifdef __cplusplus -extern "C" -{ -#endif -#endif - -#define silk_encoder_state_Fxx silk_encoder_state_FIX -#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX -#define silk_encode_frame_Fxx silk_encode_frame_FIX - -/*********************/ -/* Encoder Functions */ -/*********************/ - -/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ -void silk_HP_variable_cutoff( - silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ -); - -/* Encoder main function */ -void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ -); - -/* Encoder main function */ -opus_int silk_encode_frame_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -); - -/* Initializes the Silk encoder state */ -opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ - int arch /* I Run-time architecture */ -); - -/* Control the Silk encoder */ -opus_int silk_control_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl, /* I Control structure */ - const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ - const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ - const opus_int channelNb, /* I Channel number */ - const opus_int force_fs_kHz -); - -/****************/ -/* Prefiltering */ -/****************/ -void silk_prefilter_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ - opus_int32 xw_Q10[], /* O Weighted signal */ - const opus_int16 x[] /* I Speech signal */ -); - -void silk_warped_LPC_analysis_filter_FIX_c( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -); - - -/**************************/ -/* Noise shaping analysis */ -/**************************/ -/* Compute noise shaping coefficients and initial gain values */ -void silk_noise_shape_analysis_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ - const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ - int arch /* I Run-time architecture */ -); - -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FIX( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -); - -/* Calculation of LTP state scaling */ -void silk_LTP_scale_ctrl_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/**********************************************/ -/* Prediction Analysis */ -/**********************************************/ -/* Find pitch lags */ -void silk_find_pitch_lags_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int16 res[], /* O residual */ - const opus_int16 x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -); - -/* Find LPC and LTP coefficients */ -void silk_find_pred_coefs_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - const opus_int16 res_pitch[], /* I Residual from pitch analysis */ - const opus_int16 x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* LPC analysis */ -void silk_find_LPC_FIX( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const opus_int16 x[], /* I Input signal */ - const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */ -); - -/* LTP analysis */ -void silk_find_LTP_FIX( - opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ - const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ - const opus_int subfr_length, /* I subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset, /* I number of samples in LTP memory */ - opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ - int arch /* I Run-time architecture */ -); - -void silk_LTP_analysis_filter_FIX( - opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */ - const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */ - const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */ -); - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FIX( - opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */ - const opus_int16 x[], /* I Input signal */ - opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int LPC_order, /* I LPC order */ - int arch /* I Run-time architecture */ -); - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -opus_int32 silk_residual_energy16_covar_FIX( - const opus_int16 *c, /* I Prediction vector */ - const opus_int32 *wXX, /* I Correlation matrix */ - const opus_int32 *wXx, /* I Correlation vector */ - opus_int32 wxx, /* I Signal energy */ - opus_int D, /* I Dimension */ - opus_int cQ /* I Q value for c vector 0 - 15 */ -); - -/* Processing of gains */ -void silk_process_gains_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/******************/ -/* Linear Algebra */ -/******************/ -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - const opus_int head_room, /* I Desired headroom */ - opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int *rshifts, /* I/O Right shifts of correlations */ - int arch /* I Run-time architecture */ -); - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int16 *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ - const opus_int rshifts, /* I Right shifts of correlations */ - int arch /* I Run-time architecture */ -); - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FIX( - opus_int32 *XX, /* I/O Correlation matrices */ - opus_int32 *xx, /* I/O Correlation values */ - opus_int32 noise, /* I Noise to add */ - opus_int D /* I Dimension of XX */ -); - -/* Solves Ax = b, assuming A is symmetric */ -void silk_solve_LDL_FIX( - opus_int32 *A, /* I Pointer to symetric square matrix A */ - opus_int M, /* I Size of matrix */ - const opus_int32 *b, /* I Pointer to b vector */ - opus_int32 *x_Q16 /* O Pointer to x solution vector */ -); - -#ifndef FORCE_CPP_BUILD -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* FORCE_CPP_BUILD */ -#endif /* SILK_MAIN_FIX_H */ diff --git a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h deleted file mode 100644 index c30481e437..0000000000 --- a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h +++ /dev/null @@ -1,336 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - - -/**************************************************************/ -/* Compute noise shaping coefficients and initial gain values */ -/**************************************************************/ -#define OVERRIDE_silk_noise_shape_analysis_FIX - -void silk_noise_shape_analysis_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ - const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ - int arch /* I Run-time architecture */ -) -{ - silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; - opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; - opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; - opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; - opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; - VARDECL( opus_int16, x_windowed ); - const opus_int16 *x_ptr, *pitch_res_ptr; - SAVE_STACK; - - /* Point to start of first LPC analysis block */ - x_ptr = x - psEnc->sCmn.la_shape; - - /****************/ - /* GAIN CONTROL */ - /****************/ - SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; - - /* Input quality is the average of the quality in the lowest two VAD bands */ - psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] - + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); - - /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ - psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - - SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); - - /* Reduce coding SNR during low speech activity */ - if( psEnc->sCmn.useCBR == 0 ) { - b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; - b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ - silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce gains for periodic signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); - } else { - /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ), - SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); - } - - /*************************/ - /* SPARSENESS PROCESSING */ - /*************************/ - /* Set quantizer offset */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Initially set to 0; may be overruled in process_gains(..) */ - psEnc->sCmn.indices.quantOffsetType = 0; - psEncCtrl->sparseness_Q8 = 0; - } else { - /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ - nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); - energy_variation_Q7 = 0; - log_energy_prev_Q7 = 0; - pitch_res_ptr = pitch_res; - for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { - silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); - nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ - - log_energy_Q7 = silk_lin2log( nrg ); - if( k > 0 ) { - energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 ); - } - log_energy_prev_Q7 = log_energy_Q7; - pitch_res_ptr += nSamples; - } - - psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - - SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); - - /* Set quantization offset depending on sparseness measure */ - if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - - /* Increase coding SNR for sparse signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); - } - - /*******************************/ - /* Control bandwidth expansion */ - /*******************************/ - /* More BWE for signals with high prediction gain */ - strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); - BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), - silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); - delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), - SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); - BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); - BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); - /* BWExp1 will be applied after BWExp2, so make it relative */ - BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) ); - } else { - warping_Q16 = 0; - } - - /********************************************/ - /* Compute noise shaping AR coefs and gains */ - /********************************************/ - ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Apply window: sine slope followed by flat part followed by cosine slope */ - opus_int shift, slope_part, flat_part; - flat_part = psEnc->sCmn.fs_kHz * 3; - slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); - - silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part ); - shift = slope_part; - silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) ); - shift += flat_part; - silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part ); - - /* Update pointer: next LPC analysis block */ - x_ptr += psEnc->sCmn.subfr_length; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); - } else { - /* Calculate regular auto correlation */ - silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); - } - - /* Add white noise, as a fraction of energy */ - auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), - SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); - - /* Calculate the reflection coefficients using schur */ - nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); - silk_assert( nrg >= 0 ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); - - Qnrg = -scale; /* range: -12...30*/ - silk_assert( Qnrg >= -12 ); - silk_assert( Qnrg <= 30 ); - - /* Make sure that Qnrg is an even number */ - if( Qnrg & 1 ) { - Qnrg -= 1; - nrg >>= 1; - } - - tmp32 = silk_SQRT_APPROX( nrg ); - Qnrg >>= 1; /* range: -6...15*/ - - psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \ - silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) )); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Adjust gain for warping */ - gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; - } else { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - } - } - - /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); - - /* Compute noise shaping filter coefficients */ - silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); - - /* Bandwidth expansion for analysis filter shaping */ - silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); - silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); - - /* Ratio of prediction gains, in energy domain */ - pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); - nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); - - /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ - pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); - psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); - - /* Convert to monic warped prediction coefficients and limit absolute values */ - limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); - - /* Convert from Q24 to Q13 and store in int16 */ - for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { - psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); - psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); - } - } - - /*****************/ - /* Gain tweaking */ - /*****************/ - /* Increase gains during low speech activity and put lower limit on gains */ - gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) ); - gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) ); - silk_assert( gain_mult_Q16 > 0 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); - } - - gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), - psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); - } - - /************************************************/ - /* Control low-frequency shaping and noise tilt */ - /************************************************/ - /* Less low frequency shaping for noisy inputs */ - strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ), - SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) ); - strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ - /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ - opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] ); - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 ); - psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - } - silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/ - Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) - - silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ), - silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) ); - } else { - b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/ - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - - silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 ); - psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ]; - } - Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 ); - } - - /****************************/ - /* HARMONIC SHAPING CONTROL */ - /****************************/ - /* Control boosting of harmonic frequencies */ - HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), - psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); - - /* More harmonic boost for noisy input signals */ - HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, - SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); - - if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* More harmonic noise shaping for high bitrates or noisy input */ - HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), - SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ), - psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) ); - - /* Less harmonic noise shaping for less periodic signals */ - HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ), - silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) ); - } else { - HarmShapeGain_Q16 = 0; - } - - /*************************/ - /* Smooth over subframes */ - /*************************/ - for( k = 0; k < MAX_NB_SUBFR; k++ ) { - psShapeSt->HarmBoost_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->HarmShapeGain_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->Tilt_smth_Q16 = - silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - - psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); - psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); - psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h deleted file mode 100644 index 21b256885f..0000000000 --- a/thirdparty/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h +++ /dev/null @@ -1,184 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifndef __PREFILTER_FIX_MIPSR1_H__ -#define __PREFILTER_FIX_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX -void silk_warped_LPC_analysis_filter_FIX( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order, /* I Filter order (even) */ - int arch -) -{ - opus_int n, i; - opus_int32 acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4; - opus_int32 state_cur, state_next; - - (void)arch; - - /* Order must be even */ - /* Length must be even */ - - silk_assert( ( order & 1 ) == 0 ); - silk_assert( ( length & 1 ) == 0 ); - - for( n = 0; n < length; n+=2 ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state_cur = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state_next = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - - - /* Output of lowpass section */ - tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 ); - /* Output of allpass section */ - tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); - state[ 1 ] = tmp4; - acc_Q22 = silk_RSHIFT( order, 1 ); - acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] ); - - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state_cur = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state_next = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - - - /* Output of allpass section */ - tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 ); - state[ i ] = tmp3; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); - state[ i + 1 ] = tmp4; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] ); - } - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - - state[ order ] = tmp3; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] ); - res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 ); - } -} - - - -/* Prefilter for finding Quantizer input signal */ -#define OVERRIDE_silk_prefilt_FIX -static inline void silk_prefilt_FIX( - silk_prefilter_state_FIX *P, /* I/O state */ - opus_int32 st_res_Q12[], /* I short term residual signal */ - opus_int32 xw_Q3[], /* O prefiltered signal */ - opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ - opus_int Tilt_Q14, /* I Tilt shaping coeficient */ - opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ - opus_int lag, /* I Lag for harmonic shaping */ - opus_int length /* I Length of signals */ -) -{ - opus_int i, idx, LTP_shp_buf_idx; - opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; - opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; - opus_int16 *LTP_shp_buf; - - /* To speed up use temp variables instead of using the struct */ - LTP_shp_buf = P->sLTP_shp; - LTP_shp_buf_idx = P->sLTP_shp_buf_idx; - sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; - sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; - - if( lag > 0 ) { - for( i = 0; i < length; i++ ) { - /* unrolled loop */ - silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); - idx = lag + LTP_shp_buf_idx; - n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); - } - } - else - { - for( i = 0; i < length; i++ ) { - - n_LTP_Q12 = 0; - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 ); - } - } - - /* Copy temp variable back to state */ - P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; - P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; - P->sLTP_shp_buf_idx = LTP_shp_buf_idx; -} - -#endif /* __PREFILTER_FIX_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h deleted file mode 100644 index e803ef0fce..0000000000 --- a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h +++ /dev/null @@ -1,165 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ -#define __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -#undef QC -#define QC 10 - -#undef QS -#define QS 14 - -/* Autocorrelations for a warped frequency axis */ -#define OVERRIDE_silk_warped_autocorrelation_FIX -void silk_warped_autocorrelation_FIX( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - opus_int n, i, lsh; - opus_int32 tmp1_QS=0, tmp2_QS=0, tmp3_QS=0, tmp4_QS=0, tmp5_QS=0, tmp6_QS=0, tmp7_QS=0, tmp8_QS=0, start_1=0, start_2=0, start_3=0; - opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int64 temp64; - - opus_int32 val; - val = 2 * QS - QC; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - silk_assert( 2 * QS - QC >= 0 ); - - /* Loop over samples */ - for( n = 0; n < length; n=n+4 ) { - - tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - start_1 = tmp1_QS; - tmp3_QS = silk_LSHIFT32( (opus_int32)input[ n+1], QS ); - start_2 = tmp3_QS; - tmp5_QS = silk_LSHIFT32( (opus_int32)input[ n+2], QS ); - start_3 = tmp5_QS; - tmp7_QS = silk_LSHIFT32( (opus_int32)input[ n+3], QS ); - - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, start_1); - - tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp3_QS, start_2); - - tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp5_QS, start_3); - - tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 ); - state_QS[ i ] = tmp7_QS; - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp7_QS, state_QS[0]); - - /* Output of allpass section */ - tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, start_1); - - tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp4_QS, start_2); - - tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp6_QS, start_3); - - tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 ); - state_QS[ i + 1 ] = tmp8_QS; - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp8_QS, state_QS[ 0 ]); - - } - state_QS[ order ] = tmp7_QS; - - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, start_1); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp3_QS, start_2); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp5_QS, start_3); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp7_QS, state_QS[ 0 ]); - } - - for(;n< length; n++ ) { - - tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - - /* Output of allpass section */ - tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); - state_QS[ i ] = tmp1_QS; - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, state_QS[ 0 ]); - - /* Output of allpass section */ - tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); - state_QS[ i + 1 ] = tmp2_QS; - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, state_QS[ 0 ]); - } - state_QS[ order ] = tmp1_QS; - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, state_QS[ 0 ]); - } - - temp64 = corr_QC[ 0 ]; - temp64 = __builtin_mips_shilo(temp64, val); - - lsh = silk_CLZ64( temp64 ) - 35; - lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); - *scale = -( QC + lsh ); - silk_assert( *scale >= -30 && *scale <= 12 ); - if( lsh >= 0 ) { - for( i = 0; i < order + 1; i++ ) { - temp64 = corr_QC[ i ]; - //temp64 = __builtin_mips_shilo(temp64, val); - temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); - corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); - } - } else { - for( i = 0; i < order + 1; i++ ) { - temp64 = corr_QC[ i ]; - //temp64 = __builtin_mips_shilo(temp64, val); - temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); - corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); - } - } - - corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val); - - silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ -} -#endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c b/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c deleted file mode 100644 index 22a89f75ae..0000000000 --- a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c +++ /dev/null @@ -1,451 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ -/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ -/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ -/* coefficient in an array of coefficients, for monic filters. */ -static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/ - const opus_int32 *coefs_Q24, - opus_int lambda_Q16, - opus_int order -) { - opus_int i; - opus_int32 gain_Q24; - - lambda_Q16 = -lambda_Q16; - gain_Q24 = coefs_Q24[ order - 1 ]; - for( i = order - 2; i >= 0; i-- ) { - gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 ); - } - gain_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 ); - return silk_INVERSE32_varQ( gain_Q24, 40 ); -} - -/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ -/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ -static OPUS_INLINE void limit_warped_coefs( - opus_int32 *coefs_syn_Q24, - opus_int32 *coefs_ana_Q24, - opus_int lambda_Q16, - opus_int32 limit_Q24, - opus_int order -) { - opus_int i, iter, ind = 0; - opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16; - opus_int32 nom_Q16, den_Q24; - - /* Convert to monic coefficients */ - lambda_Q16 = -lambda_Q16; - for( i = order - 1; i > 0; i-- ) { - coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); - coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); - } - lambda_Q16 = -lambda_Q16; - nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); - gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); - gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - for( i = 0; i < order; i++ ) { - coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); - coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); - } - - for( iter = 0; iter < 10; iter++ ) { - /* Find maximum absolute value */ - maxabs_Q24 = -1; - for( i = 0; i < order; i++ ) { - tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) ); - if( tmp > maxabs_Q24 ) { - maxabs_Q24 = tmp; - ind = i; - } - } - if( maxabs_Q24 <= limit_Q24 ) { - /* Coefficients are within range - done */ - return; - } - - /* Convert back to true warped coefficients */ - for( i = 1; i < order; i++ ) { - coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); - coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); - } - gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 ); - gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 ); - for( i = 0; i < order; i++ ) { - coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); - coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); - } - - /* Apply bandwidth expansion */ - chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ( - silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ), - silk_MUL( maxabs_Q24, ind + 1 ), 22 ); - silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 ); - silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 ); - - /* Convert to monic warped coefficients */ - lambda_Q16 = -lambda_Q16; - for( i = order - 1; i > 0; i-- ) { - coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); - coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); - } - lambda_Q16 = -lambda_Q16; - nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); - gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); - gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - for( i = 0; i < order; i++ ) { - coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); - coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); - } - } - silk_assert( 0 ); -} - -#if defined(MIPSr1_ASM) -#include "mips/noise_shape_analysis_FIX_mipsr1.h" -#endif - -/**************************************************************/ -/* Compute noise shaping coefficients and initial gain values */ -/**************************************************************/ -#ifndef OVERRIDE_silk_noise_shape_analysis_FIX -void silk_noise_shape_analysis_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ - const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ - int arch /* I Run-time architecture */ -) -{ - silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; - opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; - opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; - opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; - opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; - VARDECL( opus_int16, x_windowed ); - const opus_int16 *x_ptr, *pitch_res_ptr; - SAVE_STACK; - - /* Point to start of first LPC analysis block */ - x_ptr = x - psEnc->sCmn.la_shape; - - /****************/ - /* GAIN CONTROL */ - /****************/ - SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; - - /* Input quality is the average of the quality in the lowest two VAD bands */ - psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] - + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); - - /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ - psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - - SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); - - /* Reduce coding SNR during low speech activity */ - if( psEnc->sCmn.useCBR == 0 ) { - b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; - b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ - silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce gains for periodic signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); - } else { - /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ), - SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); - } - - /*************************/ - /* SPARSENESS PROCESSING */ - /*************************/ - /* Set quantizer offset */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Initially set to 0; may be overruled in process_gains(..) */ - psEnc->sCmn.indices.quantOffsetType = 0; - psEncCtrl->sparseness_Q8 = 0; - } else { - /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ - nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); - energy_variation_Q7 = 0; - log_energy_prev_Q7 = 0; - pitch_res_ptr = pitch_res; - for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { - silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); - nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ - - log_energy_Q7 = silk_lin2log( nrg ); - if( k > 0 ) { - energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 ); - } - log_energy_prev_Q7 = log_energy_Q7; - pitch_res_ptr += nSamples; - } - - psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - - SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); - - /* Set quantization offset depending on sparseness measure */ - if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - - /* Increase coding SNR for sparse signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); - } - - /*******************************/ - /* Control bandwidth expansion */ - /*******************************/ - /* More BWE for signals with high prediction gain */ - strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); - BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), - silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); - delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), - SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); - BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); - BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); - /* BWExp1 will be applied after BWExp2, so make it relative */ - BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) ); - } else { - warping_Q16 = 0; - } - - /********************************************/ - /* Compute noise shaping AR coefs and gains */ - /********************************************/ - ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Apply window: sine slope followed by flat part followed by cosine slope */ - opus_int shift, slope_part, flat_part; - flat_part = psEnc->sCmn.fs_kHz * 3; - slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); - - silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part ); - shift = slope_part; - silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) ); - shift += flat_part; - silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part ); - - /* Update pointer: next LPC analysis block */ - x_ptr += psEnc->sCmn.subfr_length; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); - } else { - /* Calculate regular auto correlation */ - silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); - } - - /* Add white noise, as a fraction of energy */ - auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), - SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); - - /* Calculate the reflection coefficients using schur */ - nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); - silk_assert( nrg >= 0 ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); - - Qnrg = -scale; /* range: -12...30*/ - silk_assert( Qnrg >= -12 ); - silk_assert( Qnrg <= 30 ); - - /* Make sure that Qnrg is an even number */ - if( Qnrg & 1 ) { - Qnrg -= 1; - nrg >>= 1; - } - - tmp32 = silk_SQRT_APPROX( nrg ); - Qnrg >>= 1; /* range: -6...15*/ - - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( tmp32, 16 - Qnrg ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Adjust gain for warping */ - gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; - } else { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - } - } - - /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); - - /* Compute noise shaping filter coefficients */ - silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); - - /* Bandwidth expansion for analysis filter shaping */ - silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); - silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); - - /* Ratio of prediction gains, in energy domain */ - pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); - nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); - - /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ - pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); - psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); - - /* Convert to monic warped prediction coefficients and limit absolute values */ - limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); - - /* Convert from Q24 to Q13 and store in int16 */ - for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { - psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); - psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); - } - } - - /*****************/ - /* Gain tweaking */ - /*****************/ - /* Increase gains during low speech activity and put lower limit on gains */ - gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) ); - gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) ); - silk_assert( gain_mult_Q16 > 0 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); - } - - gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), - psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); - } - - /************************************************/ - /* Control low-frequency shaping and noise tilt */ - /************************************************/ - /* Less low frequency shaping for noisy inputs */ - strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ), - SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) ); - strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ - /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ - opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] ); - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 ); - psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - } - silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/ - Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) - - silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ), - silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) ); - } else { - b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/ - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - - silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 ); - psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ]; - } - Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 ); - } - - /****************************/ - /* HARMONIC SHAPING CONTROL */ - /****************************/ - /* Control boosting of harmonic frequencies */ - HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), - psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); - - /* More harmonic boost for noisy input signals */ - HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, - SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); - - if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* More harmonic noise shaping for high bitrates or noisy input */ - HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), - SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ), - psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) ); - - /* Less harmonic noise shaping for less periodic signals */ - HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ), - silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) ); - } else { - HarmShapeGain_Q16 = 0; - } - - /*************************/ - /* Smooth over subframes */ - /*************************/ - for( k = 0; k < MAX_NB_SUBFR; k++ ) { - psShapeSt->HarmBoost_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->HarmShapeGain_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->Tilt_smth_Q16 = - silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - - psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); - psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); - psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); - } - RESTORE_STACK; -} -#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */ diff --git a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c b/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c deleted file mode 100644 index 01bb9fc0a8..0000000000 --- a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c +++ /dev/null @@ -1,746 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/*********************************************************** -* Pitch analyser function -********************************************************** */ -#include "SigProc_FIX.h" -#include "pitch_est_defines.h" -#include "stack_alloc.h" -#include "debug.h" -#include "pitch.h" - -#define SCRATCH_SIZE 22 -#define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 ) -#define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 ) -#define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 ) -#define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 ) -#define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 ) -#define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 ) -#define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ ) -#define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) ) -#define D_COMP_MIN ( MIN_LAG_8KHZ - 3 ) -#define D_COMP_MAX ( MAX_LAG_8KHZ + 4 ) -#define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN ) - -typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ]; - -/************************************************************/ -/* Internally used functions */ -/************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ - const opus_int16 frame[], /* I vector to correlate */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of a 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -); - -static void silk_P_Ana_calc_energy_st3( - silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ - const opus_int16 frame[], /* I vector to calc energy in */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of one 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -); - -/*************************************************************/ -/* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ -/*************************************************************/ -opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O 4 pitch lag values */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ - const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I Sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I number of 5 ms subframes */ - int arch /* I Run-time architecture */ -) -{ - VARDECL( opus_int16, frame_8kHz ); - VARDECL( opus_int16, frame_4kHz ); - opus_int32 filt_state[ 6 ]; - const opus_int16 *input_frame_ptr; - opus_int i, k, d, j; - VARDECL( opus_int16, C ); - VARDECL( opus_int32, xcorr32 ); - const opus_int16 *target_ptr, *basis_ptr; - opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target; - opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp; - VARDECL( opus_int16, d_comp ); - opus_int32 sum, threshold, lag_counter; - opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; - opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new; - VARDECL( silk_pe_stage3_vals, energies_st3 ); - VARDECL( silk_pe_stage3_vals, cross_corr_st3 ); - opus_int frame_length, frame_length_8kHz, frame_length_4kHz; - opus_int sf_length; - opus_int min_lag; - opus_int max_lag; - opus_int32 contour_bias_Q15, diff; - opus_int nb_cbk_search, cbk_size; - opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; - const opus_int8 *Lag_CB_ptr; - SAVE_STACK; - /* Check for valid sampling frequency */ - silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); - - /* Check for valid complexity setting */ - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) ); - silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) ); - - /* Set up frame lengths max / min lag for the sampling frequency */ - frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz; - frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4; - frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8; - sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz; - min_lag = PE_MIN_LAG_MS * Fs_kHz; - max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; - - /* Resample from input sampled at Fs_kHz to 8 kHz */ - ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 ); - if( Fs_kHz == 16 ) { - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length ); - } else if( Fs_kHz == 12 ) { - silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); - silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length ); - } else { - silk_assert( Fs_kHz == 8 ); - silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) ); - } - - /* Decimate again to 4 kHz */ - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */ - ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 ); - silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz ); - - /* Low-pass filter */ - for( i = frame_length_4kHz - 1; i > 0; i-- ) { - frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] ); - } - - /******************************************************************************* - ** Scale 4 kHz signal down to prevent correlations measures from overflowing - ** find scaling as max scaling for each 8kHz(?) subframe - *******************************************************************************/ - - /* Inner product is calculated with different lengths, so scale for the worst case */ - silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz ); - if( shift > 0 ) { - shift = silk_RSHIFT( shift, 1 ); - for( i = 0; i < frame_length_4kHz; i++ ) { - frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift ); - } - } - - /****************************************************************************** - * FIRST STAGE, operating in 4 khz - ******************************************************************************/ - ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 ); - ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 ); - silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) ); - target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ]; - for( k = 0; k < nb_subfr >> 1; k++ ) { - /* Check that we are within range of the array */ - silk_assert( target_ptr >= frame_4kHz ); - silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - - basis_ptr = target_ptr - MIN_LAG_4KHZ; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - - celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); - - /* Calculate first vector products before loop */ - cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; - normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ); - normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) ); - normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) ); - - matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) = - (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ - - /* From now on normalizer is computed recursively */ - for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) { - basis_ptr--; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - - cross_corr = xcorr32[ MAX_LAG_4KHZ - d ]; - - /* Add contribution of new sample and remove contribution from oldest sample */ - normalizer = silk_ADD32( normalizer, - silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) - - silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) ); - - matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) = - (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ - } - /* Update target pointer */ - target_ptr += SF_LENGTH_8KHZ; - } - - /* Combine two subframes into single correlation measure and apply short-lag bias */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { - sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ) - + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */ - sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ - C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ - } - } else { - /* Only short-lag bias */ - for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { - sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */ - sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ - C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ - } - } - - /* Sort */ - length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 ); - silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); - silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ, - length_d_srch ); - - /* Escape if correlation is very low already here */ - Cmax = (opus_int)C[ 0 ]; /* Q14 */ - if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) { - silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); - *LTPCorr_Q15 = 0; - *lagIndex = 0; - *contourIndex = 0; - RESTORE_STACK; - return 1; - } - - threshold = silk_SMULWB( search_thres1_Q16, Cmax ); - for( i = 0; i < length_d_srch; i++ ) { - /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ - if( C[ i ] > threshold ) { - d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 ); - } else { - length_d_srch = i; - break; - } - } - silk_assert( length_d_srch > 0 ); - - ALLOC( d_comp, D_COMP_STRIDE, opus_int16 ); - for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) { - d_comp[ i - D_COMP_MIN ] = 0; - } - for( i = 0; i < length_d_srch; i++ ) { - d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1; - } - - /* Convolution */ - for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { - d_comp[ i - D_COMP_MIN ] += - d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ]; - } - - length_d_srch = 0; - for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) { - if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) { - d_srch[ length_d_srch ] = i; - length_d_srch++; - } - } - - /* Convolution */ - for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { - d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ] - + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ]; - } - - length_d_comp = 0; - for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) { - if( d_comp[ i - D_COMP_MIN ] > 0 ) { - d_comp[ length_d_comp ] = i - 2; - length_d_comp++; - } - } - - /********************************************************************************** - ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation - *************************************************************************************/ - - /****************************************************************************** - ** Scale signal down to avoid correlations measures from overflowing - *******************************************************************************/ - /* find scaling as max scaling for each subframe */ - silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz ); - if( shift > 0 ) { - shift = silk_RSHIFT( shift, 1 ); - for( i = 0; i < frame_length_8kHz; i++ ) { - frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift ); - } - } - - /********************************************************************************* - * Find energy of each subframe projected onto its history, for a range of delays - *********************************************************************************/ - silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) ); - - target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; - for( k = 0; k < nb_subfr; k++ ) { - - /* Check that we are within range of the array */ - silk_assert( target_ptr >= frame_8kHz ); - silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); - - energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 ); - for( j = 0; j < length_d_comp; j++ ) { - d = d_comp[ j ]; - basis_ptr = target_ptr - d; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_8kHz ); - silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); - - cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); - if( cross_corr > 0 ) { - energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); - matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = - (opus_int16)silk_DIV32_varQ( cross_corr, - silk_ADD32( energy_target, - energy_basis ), - 13 + 1 ); /* Q13 */ - } else { - matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0; - } - } - target_ptr += SF_LENGTH_8KHZ; - } - - /* search over lag range and lags codebook */ - /* scale factor for lag codebook, as a function of center lag */ - - CCmax = silk_int32_MIN; - CCmax_b = silk_int32_MIN; - - CBimax = 0; /* To avoid returning undefined lag values */ - lag = -1; /* To check if lag with strong enough correlation has been found */ - - if( prevLag > 0 ) { - if( Fs_kHz == 12 ) { - prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 ); - } else if( Fs_kHz == 16 ) { - prevLag = silk_RSHIFT( prevLag, 1 ); - } - prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag ); - } else { - prevLag_log2_Q7 = 0; - } - silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) ); - /* Set up stage 2 codebook based on number of subframes */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - cbk_size = PE_NB_CBKS_STAGE2_EXT; - Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; - if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) { - /* If input is 8 khz use a larger codebook here because it is last stage */ - nb_cbk_search = PE_NB_CBKS_STAGE2_EXT; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE2; - } - } else { - cbk_size = PE_NB_CBKS_STAGE2_10MS; - Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE2_10MS; - } - - for( k = 0; k < length_d_srch; k++ ) { - d = d_srch[ k ]; - for( j = 0; j < nb_cbk_search; j++ ) { - CC[ j ] = 0; - for( i = 0; i < nb_subfr; i++ ) { - opus_int d_subfr; - /* Try all codebooks */ - d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ); - CC[ j ] = CC[ j ] - + (opus_int32)matrix_ptr( C, i, - d_subfr - ( MIN_LAG_8KHZ - 2 ), - CSTRIDE_8KHZ ); - } - } - /* Find best codebook */ - CCmax_new = silk_int32_MIN; - CBimax_new = 0; - for( i = 0; i < nb_cbk_search; i++ ) { - if( CC[ i ] > CCmax_new ) { - CCmax_new = CC[ i ]; - CBimax_new = i; - } - } - - /* Bias towards shorter lags */ - lag_log2_Q7 = silk_lin2log( d ); /* Q7 */ - silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) ); - silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) ); - CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */ - - /* Bias towards previous lag */ - silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) ); - if( prevLag > 0 ) { - delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7; - silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) ); - delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 ); - prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */ - prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) ); - CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */ - } - - if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ - CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */ - silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */ - ) { - CCmax_b = CCmax_new_b; - CCmax = CCmax_new; - lag = d; - CBimax = CBimax_new; - } - } - - if( lag == -1 ) { - /* No suitable candidate found */ - silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); - *LTPCorr_Q15 = 0; - *lagIndex = 0; - *contourIndex = 0; - RESTORE_STACK; - return 1; - } - - /* Output normalized correlation */ - *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 ); - silk_assert( *LTPCorr_Q15 >= 0 ); - - if( Fs_kHz > 8 ) { - VARDECL( opus_int16, scratch_mem ); - /***************************************************************************/ - /* Scale input signal down to avoid correlations measures from overflowing */ - /***************************************************************************/ - /* find scaling as max scaling for each subframe */ - silk_sum_sqr_shift( &energy, &shift, frame, frame_length ); - ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 ); - if( shift > 0 ) { - /* Move signal to scratch mem because the input signal should be unchanged */ - shift = silk_RSHIFT( shift, 1 ); - for( i = 0; i < frame_length; i++ ) { - scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift ); - } - input_frame_ptr = scratch_mem; - } else { - input_frame_ptr = frame; - } - - /* Search in original signal */ - - CBimax_old = CBimax; - /* Compensate for decimation */ - silk_assert( lag == silk_SAT16( lag ) ); - if( Fs_kHz == 12 ) { - lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 ); - } else if( Fs_kHz == 16 ) { - lag = silk_LSHIFT( lag, 1 ); - } else { - lag = silk_SMULBB( lag, 3 ); - } - - lag = silk_LIMIT_int( lag, min_lag, max_lag ); - start_lag = silk_max_int( lag - 2, min_lag ); - end_lag = silk_min_int( lag + 2, max_lag ); - lag_new = lag; /* to avoid undefined lag */ - CBimax = 0; /* to avoid undefined lag */ - - CCmax = silk_int32_MIN; - /* pitch lags according to second stage */ - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ]; - } - - /* Set up codebook parameters according to complexity setting and frame length */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - } - - /* Calculate the correlations and energies needed in stage 3 */ - ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); - silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); - - lag_counter = 0; - silk_assert( lag == silk_SAT16( lag ) ); - contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); - - target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; - energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 ); - for( d = start_lag; d <= end_lag; d++ ) { - for( j = 0; j < nb_cbk_search; j++ ) { - cross_corr = 0; - energy = energy_target; - for( k = 0; k < nb_subfr; k++ ) { - cross_corr = silk_ADD32( cross_corr, - matrix_ptr( cross_corr_st3, k, j, - nb_cbk_search )[ lag_counter ] ); - energy = silk_ADD32( energy, - matrix_ptr( energies_st3, k, j, - nb_cbk_search )[ lag_counter ] ); - silk_assert( energy >= 0 ); - } - if( cross_corr > 0 ) { - CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */ - /* Reduce depending on flatness of contour */ - diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */ - silk_assert( diff == silk_SAT16( diff ) ); - CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */ - } else { - CCmax_new = 0; - } - - if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { - CCmax = CCmax_new; - lag_new = d; - CBimax = j; - } - } - lag_counter++; - } - - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz ); - } - *lagIndex = (opus_int16)( lag_new - min_lag); - *contourIndex = (opus_int8)CBimax; - } else { /* Fs_kHz == 8 */ - /* Save Lags */ - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 ); - } - *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ ); - *contourIndex = (opus_int8)CBimax; - } - silk_assert( *lagIndex >= 0 ); - /* return as voiced */ - RESTORE_STACK; - return 0; -} - -/*********************************************************************** - * Calculates the correlations used in stage 3 search. In order to cover - * the whole lag codebook for all the searched offset lags (lag +- 2), - * the following correlations are needed in each sub frame: - * - * sf1: lag range [-8,...,7] total 16 correlations - * sf2: lag range [-4,...,4] total 9 correlations - * sf3: lag range [-3,....4] total 8 correltions - * sf4: lag range [-6,....8] total 15 correlations - * - * In total 48 correlations. The direct implementation computed in worst - * case 4*12*5 = 240 correlations, but more likely around 120. - ***********************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ - const opus_int16 frame[], /* I vector to correlate */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of a 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -) -{ - const opus_int16 *target_ptr; - opus_int i, j, k, lag_counter, lag_low, lag_high; - opus_int nb_cbk_search, delta, idx, cbk_size; - VARDECL( opus_int32, scratch_mem ); - VARDECL( opus_int32, xcorr32 ); - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - SAVE_STACK; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); - ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 ); - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the correlations for each subframe */ - lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); - silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); - for( j = lag_low; j <= lag_high; j++ ) { - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ]; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] = - scratch_mem[ idx + j ]; - } - } - target_ptr += sf_length; - } - RESTORE_STACK; -} - -/********************************************************************/ -/* Calculate the energies for first two subframes. The energies are */ -/* calculated recursively. */ -/********************************************************************/ -static void silk_P_Ana_calc_energy_st3( - silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ - const opus_int16 frame[], /* I vector to calc energy in */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of one 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -) -{ - const opus_int16 *target_ptr, *basis_ptr; - opus_int32 energy; - opus_int k, i, j, lag_counter; - opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff; - VARDECL( opus_int32, scratch_mem ); - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - SAVE_STACK; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the energy for first lag */ - basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); - energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch ); - silk_assert( energy >= 0 ); - scratch_mem[ lag_counter ] = energy; - lag_counter++; - - lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 ); - for( i = 1; i < lag_diff; i++ ) { - /* remove part outside new window */ - energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] ); - silk_assert( energy >= 0 ); - - /* add part that comes into window */ - energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) ); - silk_assert( energy >= 0 ); - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = energy; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] = - scratch_mem[ idx + j ]; - silk_assert( - matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 ); - } - } - target_ptr += sf_length; - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/prefilter_FIX.c b/thirdparty/opus/silk/fixed/prefilter_FIX.c deleted file mode 100644 index 6a8e35152e..0000000000 --- a/thirdparty/opus/silk/fixed/prefilter_FIX.c +++ /dev/null @@ -1,221 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -#if defined(MIPSr1_ASM) -#include "mips/prefilter_FIX_mipsr1.h" -#endif - - -#if !defined(OVERRIDE_silk_warped_LPC_analysis_filter_FIX) -#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ - ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) -#endif - -/* Prefilter for finding Quantizer input signal */ -static OPUS_INLINE void silk_prefilt_FIX( - silk_prefilter_state_FIX *P, /* I/O state */ - opus_int32 st_res_Q12[], /* I short term residual signal */ - opus_int32 xw_Q3[], /* O prefiltered signal */ - opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ - opus_int Tilt_Q14, /* I Tilt shaping coeficient */ - opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ - opus_int lag, /* I Lag for harmonic shaping */ - opus_int length /* I Length of signals */ -); - -void silk_warped_LPC_analysis_filter_FIX_c( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) -{ - opus_int n, i; - opus_int32 acc_Q11, tmp1, tmp2; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - for( n = 0; n < length; n++ ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state[ 1 ] = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state[ i ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state[ i + 1 ] = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - } - state[ order ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - } -} - -void silk_prefilter_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ - opus_int32 xw_Q3[], /* O Weighted signal */ - const opus_int16 x[] /* I Speech signal */ -) -{ - silk_prefilter_state_FIX *P = &psEnc->sPrefilt; - opus_int j, k, lag; - opus_int32 tmp_32; - const opus_int16 *AR1_shp_Q13; - const opus_int16 *px; - opus_int32 *pxw_Q3; - opus_int HarmShapeGain_Q12, Tilt_Q14; - opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14; - VARDECL( opus_int32, x_filt_Q12 ); - VARDECL( opus_int32, st_res_Q2 ); - opus_int16 B_Q10[ 2 ]; - SAVE_STACK; - - /* Set up pointers */ - px = x; - pxw_Q3 = xw_Q3; - lag = P->lagPrev; - ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 ); - ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Update Variables that change per sub frame */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - lag = psEncCtrl->pitchL[ k ]; - } - - /* Noise shape parameters */ - HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] ); - silk_assert( HarmShapeGain_Q12 >= 0 ); - HarmShapeFIRPacked_Q12 = silk_RSHIFT( HarmShapeGain_Q12, 2 ); - HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 ); - Tilt_Q14 = psEncCtrl->Tilt_Q14[ k ]; - LF_shp_Q14 = psEncCtrl->LF_shp_Q14[ k ]; - AR1_shp_Q13 = &psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Short term FIR filtering*/ - silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px, - psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder, psEnc->sCmn.arch ); - - /* Reduce (mainly) low frequencies during harmonic emphasis */ - B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 ); - tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 ); /* Q26 */ - tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ); /* Q26 */ - tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] ); /* Q24 */ - tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 ); /* Q10 */ - B_Q10[ 1 ]= silk_SAT16( tmp_32 ); - x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] ); - for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { - x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] ); - } - P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ]; - - silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length ); - - px += psEnc->sCmn.subfr_length; - pxw_Q3 += psEnc->sCmn.subfr_length; - } - - P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - RESTORE_STACK; -} - -#ifndef OVERRIDE_silk_prefilt_FIX -/* Prefilter for finding Quantizer input signal */ -static OPUS_INLINE void silk_prefilt_FIX( - silk_prefilter_state_FIX *P, /* I/O state */ - opus_int32 st_res_Q12[], /* I short term residual signal */ - opus_int32 xw_Q3[], /* O prefiltered signal */ - opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ - opus_int Tilt_Q14, /* I Tilt shaping coeficient */ - opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ - opus_int lag, /* I Lag for harmonic shaping */ - opus_int length /* I Length of signals */ -) -{ - opus_int i, idx, LTP_shp_buf_idx; - opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; - opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; - opus_int16 *LTP_shp_buf; - - /* To speed up use temp variables instead of using the struct */ - LTP_shp_buf = P->sLTP_shp; - LTP_shp_buf_idx = P->sLTP_shp_buf_idx; - sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; - sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; - - for( i = 0; i < length; i++ ) { - if( lag > 0 ) { - /* unrolled loop */ - silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); - idx = lag + LTP_shp_buf_idx; - n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - } else { - n_LTP_Q12 = 0; - } - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); - } - - /* Copy temp variable back to state */ - P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; - P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; - P->sLTP_shp_buf_idx = LTP_shp_buf_idx; -} -#endif /* OVERRIDE_silk_prefilt_FIX */ diff --git a/thirdparty/opus/silk/fixed/process_gains_FIX.c b/thirdparty/opus/silk/fixed/process_gains_FIX.c deleted file mode 100644 index 05aba31788..0000000000 --- a/thirdparty/opus/silk/fixed/process_gains_FIX.c +++ /dev/null @@ -1,117 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "tuning_parameters.h" - -/* Processing of gains */ -void silk_process_gains_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k; - opus_int32 s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; - - /* Gain reduction when LTP coding gain is high */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */ - s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 ); - } - } - - /* Limit the quantized signal */ - /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ - InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin( - silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length ); - - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Soft limit on ratio residual energy and squared gains */ - ResNrg = psEncCtrl->ResNrg[ k ]; - ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 ); - if( psEncCtrl->ResNrgQ[ k ] > 0 ) { - ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] ); - } else { - if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) { - ResNrgPart = silk_int32_MAX; - } else { - ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] ); - } - } - gain = psEncCtrl->Gains_Q16[ k ]; - gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) ); - if( gain_squared < silk_int16_MAX ) { - /* recalculate with higher precision */ - gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain ); - silk_assert( gain_squared > 0 ); - gain = silk_SQRT_APPROX( gain_squared ); /* Q8 */ - gain = silk_min( gain, silk_int32_MAX >> 8 ); - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 ); /* Q16 */ - } else { - gain = silk_SQRT_APPROX( gain_squared ); /* Q0 */ - gain = silk_min( gain, silk_int32_MAX >> 16 ); - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 16 ); /* Q16 */ - } - } - - /* Save unquantized gains and gain Index */ - silk_memcpy( psEncCtrl->GainsUnq_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex; - - /* Quantize gains */ - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, psEncCtrl->Gains_Q16, - &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - if( psEncCtrl->LTPredCodGain_Q7 + silk_RSHIFT( psEnc->sCmn.input_tilt_Q15, 8 ) > SILK_FIX_CONST( 1.0, 7 ) ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - } - - /* Quantizer boundary adjustment */ - quant_offset_Q10 = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ]; - psEncCtrl->Lambda_Q10 = SILK_FIX_CONST( LAMBDA_OFFSET, 10 ) - + silk_SMULBB( SILK_FIX_CONST( LAMBDA_DELAYED_DECISIONS, 10 ), psEnc->sCmn.nStatesDelayedDecision ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_SPEECH_ACT, 18 ), psEnc->sCmn.speech_activity_Q8 ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_INPUT_QUALITY, 12 ), psEncCtrl->input_quality_Q14 ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_CODING_QUALITY, 12 ), psEncCtrl->coding_quality_Q14 ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_QUANT_OFFSET, 16 ), quant_offset_Q10 ); - - silk_assert( psEncCtrl->Lambda_Q10 > 0 ); - silk_assert( psEncCtrl->Lambda_Q10 < SILK_FIX_CONST( 2, 10 ) ); -} diff --git a/thirdparty/opus/silk/fixed/regularize_correlations_FIX.c b/thirdparty/opus/silk/fixed/regularize_correlations_FIX.c deleted file mode 100644 index a2836b05f4..0000000000 --- a/thirdparty/opus/silk/fixed/regularize_correlations_FIX.c +++ /dev/null @@ -1,47 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FIX( - opus_int32 *XX, /* I/O Correlation matrices */ - opus_int32 *xx, /* I/O Correlation values */ - opus_int32 noise, /* I Noise to add */ - opus_int D /* I Dimension of XX */ -) -{ - opus_int i; - for( i = 0; i < D; i++ ) { - matrix_ptr( &XX[ 0 ], i, i, D ) = silk_ADD32( matrix_ptr( &XX[ 0 ], i, i, D ), noise ); - } - xx[ 0 ] += noise; -} diff --git a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c b/thirdparty/opus/silk/fixed/residual_energy16_FIX.c deleted file mode 100644 index ebffb2a66f..0000000000 --- a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -opus_int32 silk_residual_energy16_covar_FIX( - const opus_int16 *c, /* I Prediction vector */ - const opus_int32 *wXX, /* I Correlation matrix */ - const opus_int32 *wXx, /* I Correlation vector */ - opus_int32 wxx, /* I Signal energy */ - opus_int D, /* I Dimension */ - opus_int cQ /* I Q value for c vector 0 - 15 */ -) -{ - opus_int i, j, lshifts, Qxtra; - opus_int32 c_max, w_max, tmp, tmp2, nrg; - opus_int cn[ MAX_MATRIX_SIZE ]; - const opus_int32 *pRow; - - /* Safety checks */ - silk_assert( D >= 0 ); - silk_assert( D <= 16 ); - silk_assert( cQ > 0 ); - silk_assert( cQ < 16 ); - - lshifts = 16 - cQ; - Qxtra = lshifts; - - c_max = 0; - for( i = 0; i < D; i++ ) { - c_max = silk_max_32( c_max, silk_abs( (opus_int32)c[ i ] ) ); - } - Qxtra = silk_min_int( Qxtra, silk_CLZ32( c_max ) - 17 ); - - w_max = silk_max_32( wXX[ 0 ], wXX[ D * D - 1 ] ); - Qxtra = silk_min_int( Qxtra, silk_CLZ32( silk_MUL( D, silk_RSHIFT( silk_SMULWB( w_max, c_max ), 4 ) ) ) - 5 ); - Qxtra = silk_max_int( Qxtra, 0 ); - for( i = 0; i < D; i++ ) { - cn[ i ] = silk_LSHIFT( ( opus_int )c[ i ], Qxtra ); - silk_assert( silk_abs(cn[i]) <= ( silk_int16_MAX + 1 ) ); /* Check that silk_SMLAWB can be used */ - } - lshifts -= Qxtra; - - /* Compute wxx - 2 * wXx * c */ - tmp = 0; - for( i = 0; i < D; i++ ) { - tmp = silk_SMLAWB( tmp, wXx[ i ], cn[ i ] ); - } - nrg = silk_RSHIFT( wxx, 1 + lshifts ) - tmp; /* Q: -lshifts - 1 */ - - /* Add c' * wXX * c, assuming wXX is symmetric */ - tmp2 = 0; - for( i = 0; i < D; i++ ) { - tmp = 0; - pRow = &wXX[ i * D ]; - for( j = i + 1; j < D; j++ ) { - tmp = silk_SMLAWB( tmp, pRow[ j ], cn[ j ] ); - } - tmp = silk_SMLAWB( tmp, silk_RSHIFT( pRow[ i ], 1 ), cn[ i ] ); - tmp2 = silk_SMLAWB( tmp2, tmp, cn[ i ] ); - } - nrg = silk_ADD_LSHIFT32( nrg, tmp2, lshifts ); /* Q: -lshifts - 1 */ - - /* Keep one bit free always, because we add them for LSF interpolation */ - if( nrg < 1 ) { - nrg = 1; - } else if( nrg > silk_RSHIFT( silk_int32_MAX, lshifts + 2 ) ) { - nrg = silk_int32_MAX >> 1; - } else { - nrg = silk_LSHIFT( nrg, lshifts + 1 ); /* Q0 */ - } - return nrg; - -} diff --git a/thirdparty/opus/silk/fixed/residual_energy_FIX.c b/thirdparty/opus/silk/fixed/residual_energy_FIX.c deleted file mode 100644 index 41f74778e8..0000000000 --- a/thirdparty/opus/silk/fixed/residual_energy_FIX.c +++ /dev/null @@ -1,98 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FIX( - opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */ - const opus_int16 x[], /* I Input signal */ - opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int LPC_order, /* I LPC order */ - int arch /* I Run-time architecture */ -) -{ - opus_int offset, i, j, rshift, lz1, lz2; - opus_int16 *LPC_res_ptr; - VARDECL( opus_int16, LPC_res ); - const opus_int16 *x_ptr; - opus_int32 tmp32; - SAVE_STACK; - - x_ptr = x; - offset = LPC_order + subfr_length; - - /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ - ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 ); - silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); - for( i = 0; i < nb_subfr >> 1; i++ ) { - /* Calculate half frame LPC residual signal including preceding samples */ - silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch ); - - /* Point to first subframe of the just calculated LPC residual signal */ - LPC_res_ptr = LPC_res + LPC_order; - for( j = 0; j < ( MAX_NB_SUBFR >> 1 ); j++ ) { - /* Measure subframe energy */ - silk_sum_sqr_shift( &nrgs[ i * ( MAX_NB_SUBFR >> 1 ) + j ], &rshift, LPC_res_ptr, subfr_length ); - - /* Set Q values for the measured energy */ - nrgsQ[ i * ( MAX_NB_SUBFR >> 1 ) + j ] = -rshift; - - /* Move to next subframe */ - LPC_res_ptr += offset; - } - /* Move to next frame half */ - x_ptr += ( MAX_NB_SUBFR >> 1 ) * offset; - } - - /* Apply the squared subframe gains */ - for( i = 0; i < nb_subfr; i++ ) { - /* Fully upscale gains and energies */ - lz1 = silk_CLZ32( nrgs[ i ] ) - 1; - lz2 = silk_CLZ32( gains[ i ] ) - 1; - - tmp32 = silk_LSHIFT32( gains[ i ], lz2 ); - - /* Find squared gains */ - tmp32 = silk_SMMUL( tmp32, tmp32 ); /* Q( 2 * lz2 - 32 )*/ - - /* Scale energies */ - nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/ - nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32; - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/schur64_FIX.c b/thirdparty/opus/silk/fixed/schur64_FIX.c deleted file mode 100644 index 764a10ef3e..0000000000 --- a/thirdparty/opus/silk/fixed/schur64_FIX.c +++ /dev/null @@ -1,92 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Slower than schur(), but more accurate. */ -/* Uses SMULL(), available on armv4 */ -opus_int32 silk_schur64( /* O returns residual energy */ - opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */ - const opus_int32 c[], /* I Correlations [order+1] */ - opus_int32 order /* I Prediction order */ -) -{ - opus_int k, n; - opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; - opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31; - - silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); - - /* Check for invalid input */ - if( c[ 0 ] <= 0 ) { - silk_memset( rc_Q16, 0, order * sizeof( opus_int32 ) ); - return 0; - } - - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } - - for( k = 0; k < order; k++ ) { - /* Check that we won't be getting an unstable rc, otherwise stop here. */ - if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) { - if ( C[ k + 1 ][ 0 ] > 0 ) { - rc_Q16[ k ] = -SILK_FIX_CONST( .99f, 16 ); - } else { - rc_Q16[ k ] = SILK_FIX_CONST( .99f, 16 ); - } - k++; - break; - } - - /* Get reflection coefficient: divide two Q30 values and get result in Q31 */ - rc_tmp_Q31 = silk_DIV32_varQ( -C[ k + 1 ][ 0 ], C[ 0 ][ 1 ], 31 ); - - /* Save the output */ - rc_Q16[ k ] = silk_RSHIFT_ROUND( rc_tmp_Q31, 15 ); - - /* Update correlations */ - for( n = 0; n < order - k; n++ ) { - Ctmp1_Q30 = C[ n + k + 1 ][ 0 ]; - Ctmp2_Q30 = C[ n ][ 1 ]; - - /* Multiply and add the highest int32 */ - C[ n + k + 1 ][ 0 ] = Ctmp1_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp2_Q30, 1 ), rc_tmp_Q31 ); - C[ n ][ 1 ] = Ctmp2_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp1_Q30, 1 ), rc_tmp_Q31 ); - } - } - - for(; k < order; k++ ) { - rc_Q16[ k ] = 0; - } - - return silk_max_32( 1, C[ 0 ][ 1 ] ); -} diff --git a/thirdparty/opus/silk/fixed/schur_FIX.c b/thirdparty/opus/silk/fixed/schur_FIX.c deleted file mode 100644 index c4c0ef23b4..0000000000 --- a/thirdparty/opus/silk/fixed/schur_FIX.c +++ /dev/null @@ -1,106 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Faster than schur64(), but much less accurate. */ -/* uses SMLAWB(), requiring armv5E and higher. */ -opus_int32 silk_schur( /* O Returns residual energy */ - opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ - const opus_int32 *c, /* I correlations [order+1] */ - const opus_int32 order /* I prediction order */ -) -{ - opus_int k, n, lz; - opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; - opus_int32 Ctmp1, Ctmp2, rc_tmp_Q15; - - silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); - - /* Get number of leading zeros */ - lz = silk_CLZ32( c[ 0 ] ); - - /* Copy correlations and adjust level to Q30 */ - if( lz < 2 ) { - /* lz must be 1, so shift one to the right */ - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 ); - } - } else if( lz > 2 ) { - /* Shift to the left */ - lz -= 2; - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz ); - } - } else { - /* No need to shift */ - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } - } - - for( k = 0; k < order; k++ ) { - /* Check that we won't be getting an unstable rc, otherwise stop here. */ - if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) { - if ( C[ k + 1 ][ 0 ] > 0 ) { - rc_Q15[ k ] = -SILK_FIX_CONST( .99f, 15 ); - } else { - rc_Q15[ k ] = SILK_FIX_CONST( .99f, 15 ); - } - k++; - break; - } - - /* Get reflection coefficient */ - rc_tmp_Q15 = -silk_DIV32_16( C[ k + 1 ][ 0 ], silk_max_32( silk_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) ); - - /* Clip (shouldn't happen for properly conditioned inputs) */ - rc_tmp_Q15 = silk_SAT16( rc_tmp_Q15 ); - - /* Store */ - rc_Q15[ k ] = (opus_int16)rc_tmp_Q15; - - /* Update correlations */ - for( n = 0; n < order - k; n++ ) { - Ctmp1 = C[ n + k + 1 ][ 0 ]; - Ctmp2 = C[ n ][ 1 ]; - C[ n + k + 1 ][ 0 ] = silk_SMLAWB( Ctmp1, silk_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 ); - C[ n ][ 1 ] = silk_SMLAWB( Ctmp2, silk_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 ); - } - } - - for(; k < order; k++ ) { - rc_Q15[ k ] = 0; - } - - /* return residual energy */ - return silk_max_32( 1, C[ 0 ][ 1 ] ); -} diff --git a/thirdparty/opus/silk/fixed/solve_LS_FIX.c b/thirdparty/opus/silk/fixed/solve_LS_FIX.c deleted file mode 100644 index 51d7d49d02..0000000000 --- a/thirdparty/opus/silk/fixed/solve_LS_FIX.c +++ /dev/null @@ -1,249 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/*****************************/ -/* Internal function headers */ -/*****************************/ - -typedef struct { - opus_int32 Q36_part; - opus_int32 Q48_part; -} inv_D_t; - -/* Factorize square matrix A into LDL form */ -static OPUS_INLINE void silk_LDL_factorize_FIX( - opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ - inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ -); - -/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveFirst_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -); - -/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveLast_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - const opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -); - -static OPUS_INLINE void silk_LS_divide_Q16_FIX( - opus_int32 T[], /* I/O Numenator vector */ - inv_D_t *inv_D, /* I 1 / D vector */ - opus_int M /* I dimension */ -); - -/* Solves Ax = b, assuming A is symmetric */ -void silk_solve_LDL_FIX( - opus_int32 *A, /* I Pointer to symetric square matrix A */ - opus_int M, /* I Size of matrix */ - const opus_int32 *b, /* I Pointer to b vector */ - opus_int32 *x_Q16 /* O Pointer to x solution vector */ -) -{ - VARDECL( opus_int32, L_Q16 ); - opus_int32 Y[ MAX_MATRIX_SIZE ]; - inv_D_t inv_D[ MAX_MATRIX_SIZE ]; - SAVE_STACK; - - silk_assert( M <= MAX_MATRIX_SIZE ); - ALLOC( L_Q16, M * M, opus_int32 ); - - /*************************************************** - Factorize A by LDL such that A = L*D*L', - where L is lower triangular with ones on diagonal - ****************************************************/ - silk_LDL_factorize_FIX( A, M, L_Q16, inv_D ); - - /**************************************************** - * substitute D*L'*x = Y. ie: - L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b - ******************************************************/ - silk_LS_SolveFirst_FIX( L_Q16, M, b, Y ); - - /**************************************************** - D*L'*x = Y <=> L'*x = inv(D)*Y, because D is - diagonal just multiply with 1/d_i - ****************************************************/ - silk_LS_divide_Q16_FIX( Y, inv_D, M ); - - /**************************************************** - x = inv(L') * inv(D) * Y - *****************************************************/ - silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 ); - RESTORE_STACK; -} - -static OPUS_INLINE void silk_LDL_factorize_FIX( - opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ - inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ -) -{ - opus_int i, j, k, status, loop_count; - const opus_int32 *ptr1, *ptr2; - opus_int32 diag_min_value, tmp_32, err; - opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; - opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; - - silk_assert( M <= MAX_MATRIX_SIZE ); - - status = 1; - diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); - for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { - status = 0; - for( j = 0; j < M; j++ ) { - ptr1 = matrix_adr( L_Q16, j, 0, M ); - tmp_32 = 0; - for( i = 0; i < j; i++ ) { - v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ - tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ - } - tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); - - if( tmp_32 < diag_min_value ) { - tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); - /* Matrix not positive semi-definite, or ill conditioned */ - for( i = 0; i < M; i++ ) { - matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); - } - status = 1; - break; - } - D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ - - /* two-step division */ - one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ - one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ - err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ - one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ - - /* Save 1/Ds */ - inv_D[ j ].Q36_part = one_div_diag_Q36; - inv_D[ j ].Q48_part = one_div_diag_Q48; - - matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ - ptr1 = matrix_adr( A, j, 0, M ); - ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); - for( i = j + 1; i < M; i++ ) { - tmp_32 = 0; - for( k = 0; k < j; k++ ) { - tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ - } - tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ - - /* tmp_32 / D_Q0[j] : Divide to Q16 */ - matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), - silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); - - /* go to next column */ - ptr2 += M; - } - } - } - - silk_assert( status == 0 ); -} - -static OPUS_INLINE void silk_LS_divide_Q16_FIX( - opus_int32 T[], /* I/O Numenator vector */ - inv_D_t *inv_D, /* I 1 / D vector */ - opus_int M /* I dimension */ -) -{ - opus_int i; - opus_int32 tmp_32; - opus_int32 one_div_diag_Q36, one_div_diag_Q48; - - for( i = 0; i < M; i++ ) { - one_div_diag_Q36 = inv_D[ i ].Q36_part; - one_div_diag_Q48 = inv_D[ i ].Q48_part; - - tmp_32 = T[ i ]; - T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); - } -} - -/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveFirst_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -) -{ - opus_int i, j; - const opus_int32 *ptr32; - opus_int32 tmp_32; - - for( i = 0; i < M; i++ ) { - ptr32 = matrix_adr( L_Q16, i, 0, M ); - tmp_32 = 0; - for( j = 0; j < i; j++ ) { - tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); - } - x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); - } -} - -/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveLast_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - const opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -) -{ - opus_int i, j; - const opus_int32 *ptr32; - opus_int32 tmp_32; - - for( i = M - 1; i >= 0; i-- ) { - ptr32 = matrix_adr( L_Q16, 0, i, M ); - tmp_32 = 0; - for( j = M - 1; j > i; j-- ) { - tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); - } - x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); - } -} diff --git a/thirdparty/opus/silk/fixed/structs_FIX.h b/thirdparty/opus/silk/fixed/structs_FIX.h deleted file mode 100644 index 3294b25128..0000000000 --- a/thirdparty/opus/silk/fixed/structs_FIX.h +++ /dev/null @@ -1,134 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_STRUCTS_FIX_H -#define SILK_STRUCTS_FIX_H - -#include "typedef.h" -#include "main.h" -#include "structs.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/********************************/ -/* Noise shaping analysis state */ -/********************************/ -typedef struct { - opus_int8 LastGainIndex; - opus_int32 HarmBoost_smth_Q16; - opus_int32 HarmShapeGain_smth_Q16; - opus_int32 Tilt_smth_Q16; -} silk_shape_state_FIX; - -/********************************/ -/* Prefilter state */ -/********************************/ -typedef struct { - opus_int16 sLTP_shp[ LTP_BUF_LENGTH ]; - opus_int32 sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int sLTP_shp_buf_idx; - opus_int32 sLF_AR_shp_Q12; - opus_int32 sLF_MA_shp_Q12; - opus_int32 sHarmHP_Q2; - opus_int32 rand_seed; - opus_int lagPrev; -} silk_prefilter_state_FIX; - -/********************************/ -/* Encoder state FIX */ -/********************************/ -typedef struct { - silk_encoder_state sCmn; /* Common struct, shared with floating-point code */ - silk_shape_state_FIX sShape; /* Shape state */ - silk_prefilter_state_FIX sPrefilt; /* Prefilter State */ - - /* Buffer for find pitch and noise shape analysis */ - silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ - opus_int LTPCorr_Q15; /* Normalized correlation from pitch lag estimator */ -} silk_encoder_state_FIX; - -/************************/ -/* Encoder control FIX */ -/************************/ -typedef struct { - /* Prediction and coding parameters */ - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; - opus_int LTP_scale_Q14; - opus_int pitchL[ MAX_NB_SUBFR ]; - - /* Noise shaping parameters */ - /* Testing */ - silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ - opus_int GainsPre_Q14[ MAX_NB_SUBFR ]; - opus_int HarmBoost_Q14[ MAX_NB_SUBFR ]; - opus_int Tilt_Q14[ MAX_NB_SUBFR ]; - opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; - opus_int Lambda_Q10; - opus_int input_quality_Q14; - opus_int coding_quality_Q14; - - /* measures */ - opus_int sparseness_Q8; - opus_int32 predGain_Q16; - opus_int LTPredCodGain_Q7; - opus_int32 ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ - opus_int ResNrgQ[ MAX_NB_SUBFR ]; /* Q domain for the residual energy > 0 */ - - /* Parameters for CBR mode */ - opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ]; - opus_int8 lastGainIndexPrev; -} silk_encoder_control_FIX; - -/************************/ -/* Encoder Super Struct */ -/************************/ -typedef struct { - silk_encoder_state_FIX state_Fxx[ ENCODER_NUM_CHANNELS ]; - stereo_enc_state sStereo; - opus_int32 nBitsUsedLBRR; - opus_int32 nBitsExceeded; - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int nPrevChannelsInternal; - opus_int timeSinceSwitchAllowed_ms; - opus_int allowBandwidthSwitch; - opus_int prev_decode_only_middle; -} silk_encoder; - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/fixed/vector_ops_FIX.c b/thirdparty/opus/silk/fixed/vector_ops_FIX.c deleted file mode 100644 index d94980014f..0000000000 --- a/thirdparty/opus/silk/fixed/vector_ops_FIX.c +++ /dev/null @@ -1,102 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "pitch.h" - -/* Copy and multiply a vector by a constant */ -void silk_scale_copy_vector16( - opus_int16 *data_out, - const opus_int16 *data_in, - opus_int32 gain_Q16, /* I Gain in Q16 */ - const opus_int dataSize /* I Length */ -) -{ - opus_int i; - opus_int32 tmp32; - - for( i = 0; i < dataSize; i++ ) { - tmp32 = silk_SMULWB( gain_Q16, data_in[ i ] ); - data_out[ i ] = (opus_int16)silk_CHECK_FIT16( tmp32 ); - } -} - -/* Multiply a vector by a constant */ -void silk_scale_vector32_Q26_lshift_18( - opus_int32 *data1, /* I/O Q0/Q18 */ - opus_int32 gain_Q26, /* I Q26 */ - opus_int dataSize /* I length */ -) -{ - opus_int i; - - for( i = 0; i < dataSize; i++ ) { - data1[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( silk_SMULL( data1[ i ], gain_Q26 ), 8 ) ); /* OUTPUT: Q18 */ - } -} - -/* sum = for(i=0;i6, memory access can be reduced by half. */ -opus_int32 silk_inner_prod_aligned( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int len, /* I vector lengths */ - int arch /* I Run-time architecture */ -) -{ -#ifdef FIXED_POINT - return celt_inner_prod(inVec1, inVec2, len, arch); -#else - opus_int i; - opus_int32 sum = 0; - for( i = 0; i < len; i++ ) { - sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); - } - return sum; -#endif -} - -opus_int64 silk_inner_prod16_aligned_64_c( - const opus_int16 *inVec1, /* I input vector 1 */ - const opus_int16 *inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ -) -{ - opus_int i; - opus_int64 sum = 0; - for( i = 0; i < len; i++ ) { - sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] ); - } - return sum; -} diff --git a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c b/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c deleted file mode 100644 index 6ca6c1184d..0000000000 --- a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c +++ /dev/null @@ -1,95 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -#define QC 10 -#define QS 14 - -#if defined(MIPSr1_ASM) -#include "mips/warped_autocorrelation_FIX_mipsr1.h" -#endif - - -#ifndef OVERRIDE_silk_warped_autocorrelation_FIX -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FIX( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - opus_int n, i, lsh; - opus_int32 tmp1_QS, tmp2_QS; - opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - silk_assert( 2 * QS - QC >= 0 ); - - /* Loop over samples */ - for( n = 0; n < length; n++ ) { - tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); - state_QS[ i ] = tmp1_QS; - corr_QC[ i ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC ); - /* Output of allpass section */ - tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); - state_QS[ i + 1 ] = tmp2_QS; - corr_QC[ i + 1 ] += silk_RSHIFT64( silk_SMULL( tmp2_QS, state_QS[ 0 ] ), 2 * QS - QC ); - } - state_QS[ order ] = tmp1_QS; - corr_QC[ order ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC ); - } - - lsh = silk_CLZ64( corr_QC[ 0 ] ) - 35; - lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); - *scale = -( QC + lsh ); - silk_assert( *scale >= -30 && *scale <= 12 ); - if( lsh >= 0 ) { - for( i = 0; i < order + 1; i++ ) { - corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QC[ i ], lsh ) ); - } - } else { - for( i = 0; i < order + 1; i++ ) { - corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QC[ i ], -lsh ) ); - } - } - silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ -} -#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */ diff --git a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c deleted file mode 100644 index 3c3583c5fc..0000000000 --- a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c +++ /dev/null @@ -1,377 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include - -#include "SigProc_FIX.h" -#include "define.h" -#include "tuning_parameters.h" -#include "pitch.h" -#include "celt/x86/x86cpu.h" - -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ - -#define QA 25 -#define N_BITS_HEAD_ROOM 2 -#define MIN_RSHIFTS -16 -#define MAX_RSHIFTS (32 - QA) - -/* Compute reflection coefficients from input signal */ -void silk_burg_modified_sse4_1( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -) -{ - opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; - opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; - const opus_int16 *x_ptr; - opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; - - __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; - __m128i CONST1 = _mm_set1_epi32(1); - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); - - /* Compute autocorrelations, added over subframes */ - silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); - if( rshifts > MAX_RSHIFTS ) { - C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); - silk_assert( C0 > 0 ); - rshifts = MAX_RSHIFTS; - } else { - lz = silk_CLZ32( C0 ) - 1; - rshifts_extra = N_BITS_HEAD_ROOM - lz; - if( rshifts_extra > 0 ) { - rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); - C0 = silk_RSHIFT32( C0, rshifts_extra ); - } else { - rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); - C0 = silk_LSHIFT32( C0, -rshifts_extra ); - } - rshifts += rshifts_extra; - } - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - int i; - opus_int32 d; - x_ptr = x + s * subfr_length; - celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); - for( n = 1; n < D + 1; n++ ) { - for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) - d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); - xcorr[ n - 1 ] += d; - } - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); - } - } - } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - - /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - - invGain_Q30 = (opus_int32)1 << 30; - reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ - if( rshifts > -2 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ - for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp_QA = Af_QA[ k ]; - tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ - tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ - } - tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ - tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ - for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ - - X1_3210 = _mm_set1_epi32( x1 ); - X2_3210 = _mm_set1_epi32( x2 ); - TMP1_3210 = _mm_setzero_si128(); - TMP2_3210 = _mm_setzero_si128(); - for( k = 0; k < n - 3; k += 4 ) { - PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] ); - SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] ); - FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] ); - PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] ); - ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] ); - - T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 ); - T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 ); - - ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 ); - ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 ); - ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 ); - - FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 ); - LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 ); - - PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 ); - SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 ); - - _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 ); - _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 ); - - TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 ); - TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 ); - } - - TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) ); - TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) ); - TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) ); - TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) ); - - tmp1 += _mm_cvtsi128_si32( TMP1_3210 ); - tmp2 += _mm_cvtsi128_si32( TMP2_3210 ); - - for( ; k < n; k++ ) { - C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ - tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ - } - - tmp1 = -tmp1; /* Q17 */ - tmp2 = -tmp2; /* Q17 */ - - { - __m128i xmm_tmp1, xmm_tmp2; - __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1; - __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1; - - xmm_tmp1 = _mm_set1_epi32( tmp1 ); - xmm_tmp2 = _mm_set1_epi32( tmp2 ); - - for( k = 0; k <= n - 3; k += 4 ) { - xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] ); - xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] ); - - xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 ); - xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 ); - - /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/ - xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 ); - xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 ); - xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 ); - xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 ); - - xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 ); - xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 ); - xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 ); - xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 ); - - xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC ); - xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC ); - - X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] ); - PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] ); - - X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 ); - PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 ); - - _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 ); - _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 ); - } - - for( ; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, - silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, - silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ - } - } - } - } - - /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ - tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ - num = 0; /* Q( -rshifts ) */ - nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ - for( k = 0; k < n; k++ ) { - Atmp_QA = Af_QA[ k ]; - lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; - lz = silk_min( 32 - QA, lz ); - Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ - - tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), - Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ - } - CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ - CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ - num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ - num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ - - /* Calculate the next order reflection (parcor) coefficient */ - if( silk_abs( num ) < nrg ) { - rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); - } else { - rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; - } - - /* Update inverse prediction gain */ - tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); - if( tmp1 <= minInvGain_Q30 ) { - /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ - tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ - rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ - if( rc_Q31 > 0 ) { - /* Newton-Raphson iteration */ - rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ - rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ - if( num < 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc_Q31 = -rc_Q31; - } - } - invGain_Q30 = minInvGain_Q30; - reached_max_gain = 1; - } else { - invGain_Q30 = tmp1; - } - - /* Update the AR coefficients */ - for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af_QA[ k ]; /* QA */ - tmp2 = Af_QA[ n - k - 1 ]; /* QA */ - Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ - Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ - } - Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ - - if( reached_max_gain ) { - /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ - for( k = n + 1; k < D; k++ ) { - Af_QA[ k ] = 0; - } - break; - } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; /* Q( -rshifts ) */ - tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ - CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - } - } - - if( reached_max_gain ) { - for( k = 0; k < D; k++ ) { - /* Scale coefficients */ - A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); - } - /* Subtract energy of preceding samples from C0 */ - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts ); - } - } - /* Approximate residual energy */ - *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); - *res_nrg_Q = -rshifts; - } else { - /* Return residual energy */ - nrg = CAf[ 0 ]; /* Q( -rshifts ) */ - tmp1 = (opus_int32)1 << 16; /* Q16 */ - for( k = 0; k < D; k++ ) { - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ - nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ - tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ - A_Q16[ k ] = -Atmp1; - } - *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ - *res_nrg_Q = -rshifts; - } -} diff --git a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c deleted file mode 100644 index 488a603f5d..0000000000 --- a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c +++ /dev/null @@ -1,160 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include "main.h" -#include "celt/x86/x86cpu.h" - -void silk_warped_LPC_analysis_filter_FIX_sse4_1( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) -{ - opus_int n, i; - opus_int32 acc_Q11, tmp1, tmp2; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - if (order == 10) - { - if (0 == lambda_Q16) - { - __m128i coef_Q13_3210, coef_Q13_7654; - __m128i coef_Q13_0123, coef_Q13_4567; - __m128i state_0123, state_4567; - __m128i xmm_product1, xmm_product2; - __m128i xmm_tempa, xmm_tempb; - - register opus_int32 sum; - register opus_int32 state_8, state_9, state_a; - register opus_int64 coef_Q13_8, coef_Q13_9; - - silk_assert( length > 0 ); - - coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] ); - coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] ); - - coef_Q13_0123 = _mm_shuffle_epi32( coef_Q13_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - coef_Q13_4567 = _mm_shuffle_epi32( coef_Q13_7654, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - coef_Q13_8 = (opus_int64) coef_Q13[ 8 ]; - coef_Q13_9 = (opus_int64) coef_Q13[ 9 ]; - - state_0123 = _mm_loadu_si128( (__m128i *)(&state[ 0 ] ) ); - state_4567 = _mm_loadu_si128( (__m128i *)(&state[ 4 ] ) ); - - state_0123 = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - state_4567 = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - state_8 = state[ 8 ]; - state_9 = state[ 9 ]; - state_a = 0; - - for( n = 0; n < length; n++ ) - { - xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */ - xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 ); - - xmm_tempa = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - xmm_tempb = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */ - xmm_product2 = _mm_srli_epi64( xmm_product2, 16 ); - - xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa ); - xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb ); - - xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 ); - xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 ); - - xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_product1 ); - xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 ); - xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb ); - - sum = (coef_Q13_8 * state_8) >> 16; - sum += (coef_Q13_9 * state_9) >> 16; - - xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); - sum += _mm_cvtsi128_si32( xmm_tempa); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( ( 5 + sum ), 9); - - /* move right */ - state_a = state_9; - state_9 = state_8; - state_8 = _mm_cvtsi128_si32( state_4567 ); - state_4567 = _mm_alignr_epi8( state_0123, state_4567, 4 ); - - state_0123 = _mm_alignr_epi8( _mm_cvtsi32_si128( silk_LSHIFT( input[ n ], 14 ) ), state_0123, 4 ); - } - - _mm_storeu_si128( (__m128i *)( &state[ 0 ] ), _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); - _mm_storeu_si128( (__m128i *)( &state[ 4 ] ), _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); - state[ 8 ] = state_8; - state[ 9 ] = state_9; - state[ 10 ] = state_a; - - return; - } - } - - for( n = 0; n < length; n++ ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state[ 1 ] = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state[ i ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state[ i + 1 ] = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - } - state[ order ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - } -} diff --git a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c deleted file mode 100644 index c1e90564d0..0000000000 --- a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include "main.h" - -#include "SigProc_FIX.h" -#include "pitch.h" - -opus_int64 silk_inner_prod16_aligned_64_sse4_1( - const opus_int16 *inVec1, /* I input vector 1 */ - const opus_int16 *inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ -) -{ - opus_int i, dataSize8; - opus_int64 sum; - - __m128i xmm_tempa; - __m128i inVec1_76543210, acc1; - __m128i inVec2_76543210, acc2; - - sum = 0; - dataSize8 = len & ~7; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for( i = 0; i < dataSize8; i += 8 ) { - inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) ); - inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) ); - - /* only when all 4 operands are -32768 (0x8000), this results in wrap around */ - inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 ); - - xmm_tempa = _mm_cvtepi32_epi64( inVec1_76543210 ); - /* equal shift right 8 bytes */ - inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 ); - - acc1 = _mm_add_epi64( acc1, xmm_tempa ); - acc2 = _mm_add_epi64( acc2, inVec1_76543210 ); - } - - acc1 = _mm_add_epi64( acc1, acc2 ); - - /* equal shift right 8 bytes */ - acc2 = _mm_shuffle_epi32( acc1, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - acc1 = _mm_add_epi64( acc1, acc2 ); - - _mm_storel_epi64( (__m128i *)&sum, acc1 ); - - for( ; i < len; i++ ) { - sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); - } - - return sum; -} diff --git a/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c b/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c deleted file mode 100644 index cae89a0a18..0000000000 --- a/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c +++ /dev/null @@ -1,249 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "main_FLP.h" - -/************************************************/ -/* LPC analysis filter */ -/* NB! State is kept internally and the */ -/* filter always starts with zero state */ -/* first Order output samples are set to zero */ -/************************************************/ - -/* 16th order LPC analysis filter, does not write first 16 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter16_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 16; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ] + - s_ptr[ -8 ] * PredCoef[ 8 ] + - s_ptr[ -9 ] * PredCoef[ 9 ] + - s_ptr[ -10 ] * PredCoef[ 10 ] + - s_ptr[ -11 ] * PredCoef[ 11 ] + - s_ptr[ -12 ] * PredCoef[ 12 ] + - s_ptr[ -13 ] * PredCoef[ 13 ] + - s_ptr[ -14 ] * PredCoef[ 14 ] + - s_ptr[ -15 ] * PredCoef[ 15 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 12th order LPC analysis filter, does not write first 12 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter12_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 12; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ] + - s_ptr[ -8 ] * PredCoef[ 8 ] + - s_ptr[ -9 ] * PredCoef[ 9 ] + - s_ptr[ -10 ] * PredCoef[ 10 ] + - s_ptr[ -11 ] * PredCoef[ 11 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 10th order LPC analysis filter, does not write first 10 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter10_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 10; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ] + - s_ptr[ -8 ] * PredCoef[ 8 ] + - s_ptr[ -9 ] * PredCoef[ 9 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 8th order LPC analysis filter, does not write first 8 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter8_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 8; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 6th order LPC analysis filter, does not write first 6 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter6_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 6; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/************************************************/ -/* LPC analysis filter */ -/* NB! State is kept internally and the */ -/* filter always starts with zero state */ -/* first Order output samples are set to zero */ -/************************************************/ -void silk_LPC_analysis_filter_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length, /* I Length of input signal */ - const opus_int Order /* I LPC order */ -) -{ - silk_assert( Order <= length ); - - switch( Order ) { - case 6: - silk_LPC_analysis_filter6_FLP( r_LPC, PredCoef, s, length ); - break; - - case 8: - silk_LPC_analysis_filter8_FLP( r_LPC, PredCoef, s, length ); - break; - - case 10: - silk_LPC_analysis_filter10_FLP( r_LPC, PredCoef, s, length ); - break; - - case 12: - silk_LPC_analysis_filter12_FLP( r_LPC, PredCoef, s, length ); - break; - - case 16: - silk_LPC_analysis_filter16_FLP( r_LPC, PredCoef, s, length ); - break; - - default: - silk_assert( 0 ); - break; - } - - /* Set first Order output samples to zero */ - silk_memset( r_LPC, 0, Order * sizeof( silk_float ) ); -} - diff --git a/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c b/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c deleted file mode 100644 index 25178bacdd..0000000000 --- a/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c +++ /dev/null @@ -1,76 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "SigProc_FLP.h" - -#define RC_THRESHOLD 0.9999f - -/* compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -/* this code is based on silk_a2k_FLP() */ -silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */ - const silk_float *A, /* I prediction coefficients [order] */ - opus_int32 order /* I prediction order */ -) -{ - opus_int k, n; - double invGain, rc, rc_mult1, rc_mult2; - silk_float Atmp[ 2 ][ SILK_MAX_ORDER_LPC ]; - silk_float *Aold, *Anew; - - Anew = Atmp[ order & 1 ]; - silk_memcpy( Anew, A, order * sizeof(silk_float) ); - - invGain = 1.0; - for( k = order - 1; k > 0; k-- ) { - rc = -Anew[ k ]; - if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { - return 0.0f; - } - rc_mult1 = 1.0f - rc * rc; - rc_mult2 = 1.0f / rc_mult1; - invGain *= rc_mult1; - /* swap pointers */ - Aold = Anew; - Anew = Atmp[ k & 1 ]; - for( n = 0; n < k; n++ ) { - Anew[ n ] = (silk_float)( ( Aold[ n ] - Aold[ k - n - 1 ] * rc ) * rc_mult2 ); - } - } - rc = -Anew[ 0 ]; - if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { - return 0.0f; - } - rc_mult1 = 1.0f - rc * rc; - invGain *= rc_mult1; - return (silk_float)invGain; -} diff --git a/thirdparty/opus/silk/float/LTP_analysis_filter_FLP.c b/thirdparty/opus/silk/float/LTP_analysis_filter_FLP.c deleted file mode 100644 index 849b7c1c52..0000000000 --- a/thirdparty/opus/silk/float/LTP_analysis_filter_FLP.c +++ /dev/null @@ -1,75 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -void silk_LTP_analysis_filter_FLP( - silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */ - const silk_float *x, /* I Input signal, with preceding samples */ - const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int pre_length /* I Preceding samples for each subframe */ -) -{ - const silk_float *x_ptr, *x_lag_ptr; - silk_float Btmp[ LTP_ORDER ]; - silk_float *LTP_res_ptr; - silk_float inv_gain; - opus_int k, i, j; - - x_ptr = x; - LTP_res_ptr = LTP_res; - for( k = 0; k < nb_subfr; k++ ) { - x_lag_ptr = x_ptr - pitchL[ k ]; - inv_gain = invGains[ k ]; - for( i = 0; i < LTP_ORDER; i++ ) { - Btmp[ i ] = B[ k * LTP_ORDER + i ]; - } - - /* LTP analysis FIR filter */ - for( i = 0; i < subfr_length + pre_length; i++ ) { - LTP_res_ptr[ i ] = x_ptr[ i ]; - /* Subtract long-term prediction */ - for( j = 0; j < LTP_ORDER; j++ ) { - LTP_res_ptr[ i ] -= Btmp[ j ] * x_lag_ptr[ LTP_ORDER / 2 - j ]; - } - LTP_res_ptr[ i ] *= inv_gain; - x_lag_ptr++; - } - - /* Update pointers */ - LTP_res_ptr += subfr_length + pre_length; - x_ptr += subfr_length; - } -} diff --git a/thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c b/thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c deleted file mode 100644 index 8dbe29d0fa..0000000000 --- a/thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c +++ /dev/null @@ -1,52 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -void silk_LTP_scale_ctrl_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int round_loss; - - if( condCoding == CODE_INDEPENDENTLY ) { - /* Only scale if first frame in packet */ - round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; - psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( round_loss * psEncCtrl->LTPredCodGain * 0.1f, 0.0f, 2.0f ); - } else { - /* Default is minimum scaling */ - psEnc->sCmn.indices.LTP_scaleIndex = 0; - } - - psEncCtrl->LTP_scale = (silk_float)silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ] / 16384.0f; -} diff --git a/thirdparty/opus/silk/float/SigProc_FLP.h b/thirdparty/opus/silk/float/SigProc_FLP.h deleted file mode 100644 index f0cb3733be..0000000000 --- a/thirdparty/opus/silk/float/SigProc_FLP.h +++ /dev/null @@ -1,204 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FLP_H -#define SILK_SIGPROC_FLP_H - -#include "SigProc_FIX.h" -#include "float_cast.h" -#include - -#ifdef __cplusplus -extern "C" -{ -#endif - -/********************************************************************/ -/* SIGNAL PROCESSING FUNCTIONS */ -/********************************************************************/ - -/* Chirp (bw expand) LP AR filter */ -void silk_bwexpander_FLP( - silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I length of ar */ - const silk_float chirp /* I chirp factor (typically in range (0..1) ) */ -); - -/* compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -/* this code is based on silk_FLP_a2k() */ -silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */ - const silk_float *A, /* I prediction coefficients [order] */ - opus_int32 order /* I prediction order */ -); - -silk_float silk_schur_FLP( /* O returns residual energy */ - silk_float refl_coef[], /* O reflection coefficients (length order) */ - const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */ - opus_int order /* I order */ -); - -void silk_k2a_FLP( - silk_float *A, /* O prediction coefficients [order] */ - const silk_float *rc, /* I reflection coefficients [order] */ - opus_int32 order /* I prediction order */ -); - -/* Solve the normal equations using the Levinson-Durbin recursion */ -silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ - silk_float A[], /* O prediction coefficients [order] */ - const silk_float corr[], /* I input auto-correlations [order + 1] */ - const opus_int order /* I prediction order */ -); - -/* compute autocorrelation */ -void silk_autocorrelation_FLP( - silk_float *results, /* O result (length correlationCount) */ - const silk_float *inputData, /* I input data to correlate */ - opus_int inputDataSize, /* I length of input */ - opus_int correlationCount /* I number of correlation taps to compute */ -); - -opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ - const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I Number of 5 ms subframes */ - int arch /* I Run-time architecture */ -); - -void silk_insertion_sort_decreasing_FLP( - silk_float *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -); - -/* Compute reflection coefficients from input signal */ -silk_float silk_burg_modified_FLP( /* O returns residual energy */ - silk_float A[], /* O prediction coefficients (length order) */ - const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */ - const silk_float minInvGain, /* I minimum inverse prediction gain */ - const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I number of subframes stacked in x */ - const opus_int D /* I order */ -); - -/* multiply a vector by a constant */ -void silk_scale_vector_FLP( - silk_float *data1, - silk_float gain, - opus_int dataSize -); - -/* copy and multiply a vector by a constant */ -void silk_scale_copy_vector_FLP( - silk_float *data_out, - const silk_float *data_in, - silk_float gain, - opus_int dataSize -); - -/* inner product of two silk_float arrays, with result as double */ -double silk_inner_product_FLP( - const silk_float *data1, - const silk_float *data2, - opus_int dataSize -); - -/* sum of squares of a silk_float array, with result as double */ -double silk_energy_FLP( - const silk_float *data, - opus_int dataSize -); - -/********************************************************************/ -/* MACROS */ -/********************************************************************/ - -#define PI (3.1415926536f) - -#define silk_min_float( a, b ) (((a) < (b)) ? (a) : (b)) -#define silk_max_float( a, b ) (((a) > (b)) ? (a) : (b)) -#define silk_abs_float( a ) ((silk_float)fabs(a)) - -/* sigmoid function */ -static OPUS_INLINE silk_float silk_sigmoid( silk_float x ) -{ - return (silk_float)(1.0 / (1.0 + exp(-x))); -} - -/* floating-point to integer conversion (rounding) */ -static OPUS_INLINE opus_int32 silk_float2int( silk_float x ) -{ - return (opus_int32)float2int( x ); -} - -/* floating-point to integer conversion (rounding) */ -static OPUS_INLINE void silk_float2short_array( - opus_int16 *out, - const silk_float *in, - opus_int32 length -) -{ - opus_int32 k; - for( k = length - 1; k >= 0; k-- ) { - out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) ); - } -} - -/* integer to floating-point conversion */ -static OPUS_INLINE void silk_short2float_array( - silk_float *out, - const opus_int16 *in, - opus_int32 length -) -{ - opus_int32 k; - for( k = length - 1; k >= 0; k-- ) { - out[k] = (silk_float)in[k]; - } -} - -/* using log2() helps the fixed-point conversion */ -static OPUS_INLINE silk_float silk_log2( double x ) -{ - return ( silk_float )( 3.32192809488736 * log10( x ) ); -} - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_SIGPROC_FLP_H */ diff --git a/thirdparty/opus/silk/float/apply_sine_window_FLP.c b/thirdparty/opus/silk/float/apply_sine_window_FLP.c deleted file mode 100644 index 6aae57c0ab..0000000000 --- a/thirdparty/opus/silk/float/apply_sine_window_FLP.c +++ /dev/null @@ -1,81 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Apply sine window to signal vector */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -void silk_apply_sine_window_FLP( - silk_float px_win[], /* O Pointer to windowed signal */ - const silk_float px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -) -{ - opus_int k; - silk_float freq, c, S0, S1; - - silk_assert( win_type == 1 || win_type == 2 ); - - /* Length must be multiple of 4 */ - silk_assert( ( length & 3 ) == 0 ); - - freq = PI / ( length + 1 ); - - /* Approximation of 2 * cos(f) */ - c = 2.0f - freq * freq; - - /* Initialize state */ - if( win_type < 2 ) { - /* Start from 0 */ - S0 = 0.0f; - /* Approximation of sin(f) */ - S1 = freq; - } else { - /* Start from 1 */ - S0 = 1.0f; - /* Approximation of cos(f) */ - S1 = 0.5f * c; - } - - /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */ - /* 4 samples at a time */ - for( k = 0; k < length; k += 4 ) { - px_win[ k + 0 ] = px[ k + 0 ] * 0.5f * ( S0 + S1 ); - px_win[ k + 1 ] = px[ k + 1 ] * S1; - S0 = c * S1 - S0; - px_win[ k + 2 ] = px[ k + 2 ] * 0.5f * ( S1 + S0 ); - px_win[ k + 3 ] = px[ k + 3 ] * S0; - S1 = c * S0 - S1; - } -} diff --git a/thirdparty/opus/silk/float/autocorrelation_FLP.c b/thirdparty/opus/silk/float/autocorrelation_FLP.c deleted file mode 100644 index 8b8a9e659a..0000000000 --- a/thirdparty/opus/silk/float/autocorrelation_FLP.c +++ /dev/null @@ -1,52 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "typedef.h" -#include "SigProc_FLP.h" - -/* compute autocorrelation */ -void silk_autocorrelation_FLP( - silk_float *results, /* O result (length correlationCount) */ - const silk_float *inputData, /* I input data to correlate */ - opus_int inputDataSize, /* I length of input */ - opus_int correlationCount /* I number of correlation taps to compute */ -) -{ - opus_int i; - - if( correlationCount > inputDataSize ) { - correlationCount = inputDataSize; - } - - for( i = 0; i < correlationCount; i++ ) { - results[ i ] = (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i ); - } -} diff --git a/thirdparty/opus/silk/float/burg_modified_FLP.c b/thirdparty/opus/silk/float/burg_modified_FLP.c deleted file mode 100644 index ea5dc25a93..0000000000 --- a/thirdparty/opus/silk/float/burg_modified_FLP.c +++ /dev/null @@ -1,186 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" -#include "tuning_parameters.h" -#include "define.h" - -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/ - -/* Compute reflection coefficients from input signal */ -silk_float silk_burg_modified_FLP( /* O returns residual energy */ - silk_float A[], /* O prediction coefficients (length order) */ - const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */ - const silk_float minInvGain, /* I minimum inverse prediction gain */ - const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I number of subframes stacked in x */ - const opus_int D /* I order */ -) -{ - opus_int k, n, s, reached_max_gain; - double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2; - const silk_float *x_ptr; - double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ]; - double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ]; - double Af[ SILK_MAX_ORDER_LPC ]; - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); - - /* Compute autocorrelations, added over subframes */ - C0 = silk_energy_FLP( x, nb_subfr * subfr_length ); - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) ); - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n ); - } - } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) ); - - /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f; - invGain = 1.0f; - reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - tmp1 = x_ptr[ n ]; - tmp2 = x_ptr[ subfr_length - n - 1 ]; - for( k = 0; k < n; k++ ) { - C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ]; - C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ]; - Atmp = Af[ k ]; - tmp1 += x_ptr[ n - k - 1 ] * Atmp; - tmp2 += x_ptr[ subfr_length - n + k ] * Atmp; - } - for( k = 0; k <= n; k++ ) { - CAf[ k ] -= tmp1 * x_ptr[ n - k ]; - CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ]; - } - } - tmp1 = C_first_row[ n ]; - tmp2 = C_last_row[ n ]; - for( k = 0; k < n; k++ ) { - Atmp = Af[ k ]; - tmp1 += C_last_row[ n - k - 1 ] * Atmp; - tmp2 += C_first_row[ n - k - 1 ] * Atmp; - } - CAf[ n + 1 ] = tmp1; - CAb[ n + 1 ] = tmp2; - - /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - num = CAb[ n + 1 ]; - nrg_b = CAb[ 0 ]; - nrg_f = CAf[ 0 ]; - for( k = 0; k < n; k++ ) { - Atmp = Af[ k ]; - num += CAb[ n - k ] * Atmp; - nrg_b += CAb[ k + 1 ] * Atmp; - nrg_f += CAf[ k + 1 ] * Atmp; - } - silk_assert( nrg_f > 0.0 ); - silk_assert( nrg_b > 0.0 ); - - /* Calculate the next order reflection (parcor) coefficient */ - rc = -2.0 * num / ( nrg_f + nrg_b ); - silk_assert( rc > -1.0 && rc < 1.0 ); - - /* Update inverse prediction gain */ - tmp1 = invGain * ( 1.0 - rc * rc ); - if( tmp1 <= minInvGain ) { - /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ - rc = sqrt( 1.0 - minInvGain / invGain ); - if( num > 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc = -rc; - } - invGain = minInvGain; - reached_max_gain = 1; - } else { - invGain = tmp1; - } - - /* Update the AR coefficients */ - for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af[ k ]; - tmp2 = Af[ n - k - 1 ]; - Af[ k ] = tmp1 + rc * tmp2; - Af[ n - k - 1 ] = tmp2 + rc * tmp1; - } - Af[ n ] = rc; - - if( reached_max_gain ) { - /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ - for( k = n + 1; k < D; k++ ) { - Af[ k ] = 0.0; - } - break; - } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; - CAf[ k ] += rc * CAb[ n - k + 1 ]; - CAb[ n - k + 1 ] += rc * tmp1; - } - } - - if( reached_max_gain ) { - /* Convert to silk_float */ - for( k = 0; k < D; k++ ) { - A[ k ] = (silk_float)( -Af[ k ] ); - } - /* Subtract energy of preceding samples from C0 */ - for( s = 0; s < nb_subfr; s++ ) { - C0 -= silk_energy_FLP( x + s * subfr_length, D ); - } - /* Approximate residual energy */ - nrg_f = C0 * invGain; - } else { - /* Compute residual energy and store coefficients as silk_float */ - nrg_f = CAf[ 0 ]; - tmp1 = 1.0; - for( k = 0; k < D; k++ ) { - Atmp = Af[ k ]; - nrg_f += CAf[ k + 1 ] * Atmp; - tmp1 += Atmp * Atmp; - A[ k ] = (silk_float)(-Atmp); - } - nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1; - } - - /* Return residual energy */ - return (silk_float)nrg_f; -} diff --git a/thirdparty/opus/silk/float/bwexpander_FLP.c b/thirdparty/opus/silk/float/bwexpander_FLP.c deleted file mode 100644 index d55a4d79ab..0000000000 --- a/thirdparty/opus/silk/float/bwexpander_FLP.c +++ /dev/null @@ -1,49 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* Chirp (bw expand) LP AR filter */ -void silk_bwexpander_FLP( - silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I length of ar */ - const silk_float chirp /* I chirp factor (typically in range (0..1) ) */ -) -{ - opus_int i; - silk_float cfac = chirp; - - for( i = 0; i < d - 1; i++ ) { - ar[ i ] *= cfac; - cfac *= chirp; - } - ar[ d - 1 ] *= cfac; -} diff --git a/thirdparty/opus/silk/float/corrMatrix_FLP.c b/thirdparty/opus/silk/float/corrMatrix_FLP.c deleted file mode 100644 index eae6a1cfca..0000000000 --- a/thirdparty/opus/silk/float/corrMatrix_FLP.c +++ /dev/null @@ -1,93 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/********************************************************************** - * Correlation matrix computations for LS estimate. - **********************************************************************/ - -#include "main_FLP.h" - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FLP( - const silk_float *x, /* I x vector [L+order-1] used to create X */ - const silk_float *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vecors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *Xt /* O X'*t correlation vector [order] */ -) -{ - opus_int lag; - const silk_float *ptr1; - - ptr1 = &x[ Order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */ - for( lag = 0; lag < Order; lag++ ) { - /* Calculate X[:,lag]'*t */ - Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L ); - ptr1--; /* Next column of X */ - } -} - -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FLP( - const silk_float *x, /* I x vector [ L+order-1 ] used to create X */ - const opus_int L, /* I Length of vectors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *XX /* O X'*X correlation matrix [order x order] */ -) -{ - opus_int j, lag; - double energy; - const silk_float *ptr1, *ptr2; - - ptr1 = &x[ Order - 1 ]; /* First sample of column 0 of X */ - energy = silk_energy_FLP( ptr1, L ); /* X[:,0]'*X[:,0] */ - matrix_ptr( XX, 0, 0, Order ) = ( silk_float )energy; - for( j = 1; j < Order; j++ ) { - /* Calculate X[:,j]'*X[:,j] */ - energy += ptr1[ -j ] * ptr1[ -j ] - ptr1[ L - j ] * ptr1[ L - j ]; - matrix_ptr( XX, j, j, Order ) = ( silk_float )energy; - } - - ptr2 = &x[ Order - 2 ]; /* First sample of column 1 of X */ - for( lag = 1; lag < Order; lag++ ) { - /* Calculate X[:,0]'*X[:,lag] */ - energy = silk_inner_product_FLP( ptr1, ptr2, L ); - matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy; - matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy; - /* Calculate X[:,j]'*X[:,j + lag] */ - for( j = 1; j < ( Order - lag ); j++ ) { - energy += ptr1[ -j ] * ptr2[ -j ] - ptr1[ L - j ] * ptr2[ L - j ]; - matrix_ptr( XX, lag + j, j, Order ) = ( silk_float )energy; - matrix_ptr( XX, j, lag + j, Order ) = ( silk_float )energy; - } - ptr2--; /* Next column of X */ - } -} diff --git a/thirdparty/opus/silk/float/encode_frame_FLP.c b/thirdparty/opus/silk/float/encode_frame_FLP.c deleted file mode 100644 index 2092a4d9e2..0000000000 --- a/thirdparty/opus/silk/float/encode_frame_FLP.c +++ /dev/null @@ -1,372 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float xfw[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -); - -void silk_encode_do_VAD_FLP( - silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ -) -{ - /****************************/ - /* Voice Activity Detection */ - /****************************/ - silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); - - /**************************************************/ - /* Convert speech activity into VAD and DTX flags */ - /**************************************************/ - if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { - psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; - psEnc->sCmn.noSpeechCounter++; - if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.inDTX = 0; - } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; - psEnc->sCmn.inDTX = 0; - } - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; - } else { - psEnc->sCmn.noSpeechCounter = 0; - psEnc->sCmn.inDTX = 0; - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - } -} - -/****************/ -/* Encode frame */ -/****************/ -opus_int silk_encode_frame_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - opus_int32 *pnBytesOut, /* O Number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -) -{ - silk_encoder_control_FLP sEncCtrl; - opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; - silk_float *x_frame, *res_pitch_frame; - silk_float xfw[ MAX_FRAME_LENGTH ]; - silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; - ec_enc sRangeEnc_copy, sRangeEnc_copy2; - silk_nsq_state sNSQ_copy, sNSQ_copy2; - opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; - opus_int32 gainsID, gainsID_lower, gainsID_upper; - opus_int16 gainMult_Q8; - opus_int16 ec_prevLagIndex_copy; - opus_int ec_prevSignalType_copy; - opus_int8 LastGainIndex_copy2; - opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; - opus_uint8 ec_buf_copy[ 1275 ]; - - /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ - LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; - - psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; - - /**************************************************************/ - /* Set up Input Pointers, and insert frame in input buffer */ - /**************************************************************/ - /* pointers aligned with start of frame to encode */ - x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */ - res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */ - - /***************************************/ - /* Ensure smooth bandwidth transitions */ - /***************************************/ - silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); - - /*******************************************/ - /* Copy new frame to front of input buffer */ - /*******************************************/ - silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); - - /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */ - for( i = 0; i < 8; i++ ) { - x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f; - } - - if( !psEnc->sCmn.prefillFlag ) { - /*****************************************/ - /* Find pitch lags, initial LPC analysis */ - /*****************************************/ - silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); - - /************************/ - /* Noise shape analysis */ - /************************/ - silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); - - /***************************************************/ - /* Find linear prediction coefficients (LPC + LTP) */ - /***************************************************/ - silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); - - /****************************************/ - /* Process gains */ - /****************************************/ - silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding ); - - /*****************************************/ - /* Prefiltering for noise shaper */ - /*****************************************/ - silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame ); - - /****************************************/ - /* Low Bitrate Redundant Encoding */ - /****************************************/ - silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding ); - - /* Loop over quantizer and entroy coding to control bitrate */ - maxIter = 6; - gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); - found_lower = 0; - found_upper = 0; - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - gainsID_lower = -1; - gainsID_upper = -1; - /* Copy part of the input state */ - silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); - silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - seed_copy = psEnc->sCmn.indices.Seed; - ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; - ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; - for( iter = 0; ; iter++ ) { - if( gainsID == gainsID_lower ) { - nBits = nBits_lower; - } else if( gainsID == gainsID_upper ) { - nBits = nBits_upper; - } else { - /* Restore part of the input state */ - if( iter > 0 ) { - silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); - psEnc->sCmn.indices.Seed = seed_copy; - psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; - psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; - } - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw ); - - /****************************************/ - /* Encode Parameters */ - /****************************************/ - silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); - - /****************************************/ - /* Encode Excitation Signal */ - /****************************************/ - silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, - psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); - - nBits = ec_tell( psRangeEnc ); - - if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { - break; - } - } - - if( iter == maxIter ) { - if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { - /* Restore output state from earlier iteration that did meet the bitrate budget */ - silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - silk_assert( sRangeEnc_copy2.offs <= 1275 ); - silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); - psEnc->sShape.LastGainIndex = LastGainIndex_copy2; - } - break; - } - - if( nBits > maxBits ) { - if( found_lower == 0 && iter >= 2 ) { - /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ - sEncCtrl.Lambda *= 1.5f; - found_upper = 0; - gainsID_upper = -1; - } else { - found_upper = 1; - nBits_upper = nBits; - gainMult_upper = gainMult_Q8; - gainsID_upper = gainsID; - } - } else if( nBits < maxBits - 5 ) { - found_lower = 1; - nBits_lower = nBits; - gainMult_lower = gainMult_Q8; - if( gainsID != gainsID_lower ) { - gainsID_lower = gainsID; - /* Copy part of the output state */ - silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - silk_assert( psRangeEnc->offs <= 1275 ); - silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); - silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; - } - } else { - /* Within 5 bits of budget: close enough */ - break; - } - - if( ( found_lower & found_upper ) == 0 ) { - /* Adjust gain according to high-rate rate/distortion curve */ - opus_int32 gain_factor_Q16; - gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); - gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); - if( nBits > maxBits ) { - gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); - } - gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); - } else { - /* Adjust gain by interpolating */ - gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower ); - /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ - if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); - } else - if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); - } - - /* Quantize gains */ - psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, - &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Unique identifier of gains vector */ - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - - /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f; - } - } - } - - /* Update input buffer */ - silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], - ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) ); - - /* Exit without entropy coding */ - if( psEnc->sCmn.prefillFlag ) { - /* No payload */ - *pnBytesOut = 0; - return ret; - } - - /* Parameters needed for next frame */ - psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; - - /****************************************/ - /* Finalize payload */ - /****************************************/ - psEnc->sCmn.first_frame_after_reset = 0; - /* Payload size */ - *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); - - return ret; -} - -/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float xfw[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -) -{ - opus_int k; - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - silk_float TempGains[ MAX_NB_SUBFR ]; - SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; - silk_nsq_state sNSQ_LBRR; - - /*******************************************/ - /* Control use of inband LBRR */ - /*******************************************/ - if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { - psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - - /* Copy noise shaping quantizer state and quantization indices from regular encoding */ - silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); - - /* Save original gains */ - silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); - - if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { - /* First frame in packet or previous frame not LBRR coded */ - psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; - - /* Increase Gains to get target LBRR rate */ - psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases; - psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); - } - - /* Decode to get gains in sync with decoder */ - silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices, - &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f ); - } - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR, - psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw ); - - /* Restore original gains */ - silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); - } -} diff --git a/thirdparty/opus/silk/float/energy_FLP.c b/thirdparty/opus/silk/float/energy_FLP.c deleted file mode 100644 index 24b8179f9e..0000000000 --- a/thirdparty/opus/silk/float/energy_FLP.c +++ /dev/null @@ -1,60 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* sum of squares of a silk_float array, with result as double */ -double silk_energy_FLP( - const silk_float *data, - opus_int dataSize -) -{ - opus_int i, dataSize4; - double result; - - /* 4x unrolled loop */ - result = 0.0; - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - result += data[ i + 0 ] * (double)data[ i + 0 ] + - data[ i + 1 ] * (double)data[ i + 1 ] + - data[ i + 2 ] * (double)data[ i + 2 ] + - data[ i + 3 ] * (double)data[ i + 3 ]; - } - - /* add any remaining products */ - for( ; i < dataSize; i++ ) { - result += data[ i ] * (double)data[ i ]; - } - - silk_assert( result >= 0.0 ); - return result; -} diff --git a/thirdparty/opus/silk/float/find_LPC_FLP.c b/thirdparty/opus/silk/float/find_LPC_FLP.c deleted file mode 100644 index fcfe1c3681..0000000000 --- a/thirdparty/opus/silk/float/find_LPC_FLP.c +++ /dev/null @@ -1,104 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "define.h" -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* LPC analysis */ -void silk_find_LPC_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const silk_float x[], /* I Input signal */ - const silk_float minInvGain /* I Inverse of max prediction gain */ -) -{ - opus_int k, subfr_length; - silk_float a[ MAX_LPC_ORDER ]; - - /* Used only for NLSF interpolation */ - silk_float res_nrg, res_nrg_2nd, res_nrg_interp; - opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ]; - silk_float a_tmp[ MAX_LPC_ORDER ]; - silk_float LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ]; - - subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; - - /* Default: No interpolation */ - psEncC->indices.NLSFInterpCoef_Q2 = 4; - - /* Burg AR analysis for the full frame */ - res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder ); - - if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { - /* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than */ - /* adding it to the residual energy of the first 10 ms in each iteration of the search below */ - res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder ); - - /* Convert to NLSFs */ - silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder ); - - /* Search over interpolation indices to find the one with lowest residual energy */ - res_nrg_2nd = silk_float_MAX; - for( k = 3; k >= 0; k-- ) { - /* Interpolate NLSFs for first half */ - silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); - - /* Convert to LPC for residual energy evaluation */ - silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder ); - - /* Calculate residual energy with LSF interpolation */ - silk_LPC_analysis_filter_FLP( LPC_res, a_tmp, x, 2 * subfr_length, psEncC->predictLPCOrder ); - res_nrg_interp = (silk_float)( - silk_energy_FLP( LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ) + - silk_energy_FLP( LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ) ); - - /* Determine whether current interpolated NLSFs are best so far */ - if( res_nrg_interp < res_nrg ) { - /* Interpolation has lower residual energy */ - res_nrg = res_nrg_interp; - psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k; - } else if( res_nrg_interp > res_nrg_2nd ) { - /* No reason to continue iterating - residual energies will continue to climb */ - break; - } - res_nrg_2nd = res_nrg_interp; - } - } - - if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) { - /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ - silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder ); - } - - silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || - ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); -} diff --git a/thirdparty/opus/silk/float/find_LTP_FLP.c b/thirdparty/opus/silk/float/find_LTP_FLP.c deleted file mode 100644 index 7229996014..0000000000 --- a/thirdparty/opus/silk/float/find_LTP_FLP.c +++ /dev/null @@ -1,132 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -void silk_find_LTP_FLP( - silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - silk_float *LTPredCodGain, /* O LTP coding gain */ - const silk_float r_lpc[], /* I LPC residual */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset /* I Number of samples in LTP memory */ -) -{ - opus_int i, k; - silk_float *b_ptr, temp, *WLTP_ptr; - silk_float LPC_res_nrg, LPC_LTP_res_nrg; - silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ]; - silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu; - silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; - const silk_float *r_ptr, *lag_ptr; - - b_ptr = b; - WLTP_ptr = WLTP; - r_ptr = &r_lpc[ mem_offset ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); - - silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr ); - silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr ); - - rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length ); - regu = 1.0f + rr[ k ] + - matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) + - matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ); - regu *= LTP_DAMPING / 3; - silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER ); - silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr ); - - /* Calculate residual energy */ - nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER ); - - temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); - silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER ); - w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER ); - - r_ptr += subfr_length; - b_ptr += LTP_ORDER; - WLTP_ptr += LTP_ORDER * LTP_ORDER; - } - - /* Compute LTP coding gain */ - if( LTPredCodGain != NULL ) { - LPC_LTP_res_nrg = 1e-6f; - LPC_res_nrg = 0.0f; - for( k = 0; k < nb_subfr; k++ ) { - LPC_res_nrg += rr[ k ] * Wght[ k ]; - LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ]; - } - - silk_assert( LPC_LTP_res_nrg > 0 ); - *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg ); - } - - /* Smoothing */ - /* d = sum( B, 1 ); */ - b_ptr = b; - for( k = 0; k < nb_subfr; k++ ) { - d[ k ] = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - d[ k ] += b_ptr[ i ]; - } - b_ptr += LTP_ORDER; - } - /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ - temp = 1e-3f; - for( k = 0; k < nb_subfr; k++ ) { - temp += w[ k ]; - } - m = 0; - for( k = 0; k < nb_subfr; k++ ) { - m += d[ k ] * w[ k ]; - } - m = m / temp; - - b_ptr = b; - for( k = 0; k < nb_subfr; k++ ) { - g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] ); - temp = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f ); - temp += delta_b[ i ]; - } - temp = g / temp; - for( i = 0; i < LTP_ORDER; i++ ) { - b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp; - } - b_ptr += LTP_ORDER; - } -} diff --git a/thirdparty/opus/silk/float/find_pitch_lags_FLP.c b/thirdparty/opus/silk/float/find_pitch_lags_FLP.c deleted file mode 100644 index f3b22d25ce..0000000000 --- a/thirdparty/opus/silk/float/find_pitch_lags_FLP.c +++ /dev/null @@ -1,132 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include "main_FLP.h" -#include "tuning_parameters.h" - -void silk_find_pitch_lags_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - silk_float res[], /* O Residual */ - const silk_float x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int buf_len; - silk_float thrhld, res_nrg; - const silk_float *x_buf_ptr, *x_buf; - silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; - silk_float A[ MAX_FIND_PITCH_LPC_ORDER ]; - silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ]; - silk_float Wsig[ FIND_PITCH_LPC_WIN_MAX ]; - silk_float *Wsig_ptr; - - /******************************************/ - /* Set up buffer lengths etc based on Fs */ - /******************************************/ - buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; - - /* Safety check */ - silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); - - x_buf = x - psEnc->sCmn.ltp_mem_length; - - /******************************************/ - /* Estimate LPC AR coeficients */ - /******************************************/ - - /* Calculate windowed signal */ - - /* First LA_LTP samples */ - x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; - Wsig_ptr = Wsig; - silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); - - /* Middle non-windowed samples */ - Wsig_ptr += psEnc->sCmn.la_pitch; - x_buf_ptr += psEnc->sCmn.la_pitch; - silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ) ) * sizeof( silk_float ) ); - - /* Last LA_LTP samples */ - Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); - x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); - silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); - - /* Calculate autocorrelation sequence */ - silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 ); - - /* Add white noise, as a fraction of the energy */ - auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1; - - /* Calculate the reflection coefficients using Schur */ - res_nrg = silk_schur_FLP( refl_coef, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Prediction gain */ - psEncCtrl->predGain = auto_corr[ 0 ] / silk_max_float( res_nrg, 1.0f ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_FLP( A, refl_coef, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Bandwidth expansion */ - silk_bwexpander_FLP( A, psEnc->sCmn.pitchEstimationLPCOrder, FIND_PITCH_BANDWIDTH_EXPANSION ); - - /*****************************************/ - /* LPC analysis filtering */ - /*****************************************/ - silk_LPC_analysis_filter_FLP( res, A, x_buf, buf_len, psEnc->sCmn.pitchEstimationLPCOrder ); - - if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { - /* Threshold for pitch estimator */ - thrhld = 0.6f; - thrhld -= 0.004f * psEnc->sCmn.pitchEstimationLPCOrder; - thrhld -= 0.1f * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - thrhld -= 0.15f * (psEnc->sCmn.prevSignalType >> 1); - thrhld -= 0.1f * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ); - - /*****************************************/ - /* Call Pitch estimator */ - /*****************************************/ - if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, - &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f, - thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 ) - { - psEnc->sCmn.indices.signalType = TYPE_VOICED; - } else { - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - } - } else { - silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); - psEnc->sCmn.indices.lagIndex = 0; - psEnc->sCmn.indices.contourIndex = 0; - psEnc->LTPCorr = 0; - } -} diff --git a/thirdparty/opus/silk/float/find_pred_coefs_FLP.c b/thirdparty/opus/silk/float/find_pred_coefs_FLP.c deleted file mode 100644 index 1af4fe5f1b..0000000000 --- a/thirdparty/opus/silk/float/find_pred_coefs_FLP.c +++ /dev/null @@ -1,118 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Find LPC and LTP coefficients */ -void silk_find_pred_coefs_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float res_pitch[], /* I Residual from pitch analysis */ - const silk_float x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i; - silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; - silk_float invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ]; - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; - const silk_float *x_ptr; - silk_float *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ]; - silk_float minInvGain; - - /* Weighting for weighted least squares */ - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - silk_assert( psEncCtrl->Gains[ i ] > 0.0f ); - invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ]; - Wght[ i ] = invGains[ i ] * invGains[ i ]; - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /**********/ - /* VOICED */ - /**********/ - silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); - - /* LTP analysis */ - silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch, - psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length ); - - /* Quantize LTP gain parameters */ - silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, - psEnc->sCmn.arch ); - - /* Control LTP scaling */ - silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding ); - - /* Create LTP residual */ - silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef, - psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); - } else { - /************/ - /* UNVOICED */ - /************/ - /* Create signal with prepended subframes, scaled by inverse gains */ - x_ptr = x - psEnc->sCmn.predictLPCOrder; - x_pre_ptr = LPC_in_pre; - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ], - psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); - x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; - x_ptr += psEnc->sCmn.subfr_length; - } - silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) ); - psEncCtrl->LTPredCodGain = 0.0f; - psEnc->sCmn.sum_log_gain_Q7 = 0; - } - - /* Limit on total predictive coding gain */ - if( psEnc->sCmn.first_frame_after_reset ) { - minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET; - } else { - minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) / MAX_PREDICTION_POWER_GAIN; - minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality; - } - - /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ - silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain ); - - /* Quantize LSFs */ - silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 ); - - /* Calculate residual energy using quantized LPC coefficients */ - silk_residual_energy_FLP( psEncCtrl->ResNrg, LPC_in_pre, psEncCtrl->PredCoef, psEncCtrl->Gains, - psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); - - /* Copy to prediction struct for use in next frame for interpolation */ - silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); -} - diff --git a/thirdparty/opus/silk/float/inner_product_FLP.c b/thirdparty/opus/silk/float/inner_product_FLP.c deleted file mode 100644 index 029c012911..0000000000 --- a/thirdparty/opus/silk/float/inner_product_FLP.c +++ /dev/null @@ -1,60 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* inner product of two silk_float arrays, with result as double */ -double silk_inner_product_FLP( - const silk_float *data1, - const silk_float *data2, - opus_int dataSize -) -{ - opus_int i, dataSize4; - double result; - - /* 4x unrolled loop */ - result = 0.0; - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - result += data1[ i + 0 ] * (double)data2[ i + 0 ] + - data1[ i + 1 ] * (double)data2[ i + 1 ] + - data1[ i + 2 ] * (double)data2[ i + 2 ] + - data1[ i + 3 ] * (double)data2[ i + 3 ]; - } - - /* add any remaining products */ - for( ; i < dataSize; i++ ) { - result += data1[ i ] * (double)data2[ i ]; - } - - return result; -} diff --git a/thirdparty/opus/silk/float/k2a_FLP.c b/thirdparty/opus/silk/float/k2a_FLP.c deleted file mode 100644 index 12af4e7669..0000000000 --- a/thirdparty/opus/silk/float/k2a_FLP.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a_FLP( - silk_float *A, /* O prediction coefficients [order] */ - const silk_float *rc, /* I reflection coefficients [order] */ - opus_int32 order /* I prediction order */ -) -{ - opus_int k, n; - silk_float Atmp[ SILK_MAX_ORDER_LPC ]; - - for( k = 0; k < order; k++ ) { - for( n = 0; n < k; n++ ) { - Atmp[ n ] = A[ n ]; - } - for( n = 0; n < k; n++ ) { - A[ n ] += Atmp[ k - n - 1 ] * rc[ k ]; - } - A[ k ] = -rc[ k ]; - } -} diff --git a/thirdparty/opus/silk/float/levinsondurbin_FLP.c b/thirdparty/opus/silk/float/levinsondurbin_FLP.c deleted file mode 100644 index f0ba606981..0000000000 --- a/thirdparty/opus/silk/float/levinsondurbin_FLP.c +++ /dev/null @@ -1,81 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* Solve the normal equations using the Levinson-Durbin recursion */ -silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ - silk_float A[], /* O prediction coefficients [order] */ - const silk_float corr[], /* I input auto-correlations [order + 1] */ - const opus_int order /* I prediction order */ -) -{ - opus_int i, mHalf, m; - silk_float min_nrg, nrg, t, km, Atmp1, Atmp2; - - min_nrg = 1e-12f * corr[ 0 ] + 1e-9f; - nrg = corr[ 0 ]; - nrg = silk_max_float(min_nrg, nrg); - A[ 0 ] = corr[ 1 ] / nrg; - nrg -= A[ 0 ] * corr[ 1 ]; - nrg = silk_max_float(min_nrg, nrg); - - for( m = 1; m < order; m++ ) - { - t = corr[ m + 1 ]; - for( i = 0; i < m; i++ ) { - t -= A[ i ] * corr[ m - i ]; - } - - /* reflection coefficient */ - km = t / nrg; - - /* residual energy */ - nrg -= km * t; - nrg = silk_max_float(min_nrg, nrg); - - mHalf = m >> 1; - for( i = 0; i < mHalf; i++ ) { - Atmp1 = A[ i ]; - Atmp2 = A[ m - i - 1 ]; - A[ m - i - 1 ] -= km * Atmp1; - A[ i ] -= km * Atmp2; - } - if( m & 1 ) { - A[ mHalf ] -= km * A[ mHalf ]; - } - A[ m ] = km; - } - - /* return the residual energy */ - return nrg; -} - diff --git a/thirdparty/opus/silk/float/main_FLP.h b/thirdparty/opus/silk/float/main_FLP.h deleted file mode 100644 index e5a75972e5..0000000000 --- a/thirdparty/opus/silk/float/main_FLP.h +++ /dev/null @@ -1,313 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MAIN_FLP_H -#define SILK_MAIN_FLP_H - -#include "SigProc_FLP.h" -#include "SigProc_FIX.h" -#include "structs_FLP.h" -#include "main.h" -#include "define.h" -#include "debug.h" -#include "entenc.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define silk_encoder_state_Fxx silk_encoder_state_FLP -#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FLP -#define silk_encode_frame_Fxx silk_encode_frame_FLP - -/*********************/ -/* Encoder Functions */ -/*********************/ - -/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ -void silk_HP_variable_cutoff( - silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ -); - -/* Encoder main function */ -void silk_encode_do_VAD_FLP( - silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ -); - -/* Encoder main function */ -opus_int silk_encode_frame_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - opus_int32 *pnBytesOut, /* O Number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -); - -/* Initializes the Silk encoder state */ -opus_int silk_init_encoder( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - int arch /* I Run-tim architecture */ -); - -/* Control the Silk encoder */ -opus_int silk_control_encoder( - silk_encoder_state_FLP *psEnc, /* I/O Pointer to Silk encoder state FLP */ - silk_EncControlStruct *encControl, /* I Control structure */ - const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ - const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ - const opus_int channelNb, /* I Channel number */ - const opus_int force_fs_kHz -); - -/****************/ -/* Prefiltering */ -/****************/ -void silk_prefilter_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ - silk_float xw[], /* O Weighted signal */ - const silk_float x[] /* I Speech signal */ -); - -/**************************/ -/* Noise shaping analysis */ -/**************************/ -/* Compute noise shaping coefficients and initial gain values */ -void silk_noise_shape_analysis_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float *pitch_res, /* I LPC residual from pitch analysis */ - const silk_float *x /* I Input signal [frame_length + la_shape] */ -); - -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FLP( - silk_float *corr, /* O Result [order + 1] */ - const silk_float *input, /* I Input data to correlate */ - const silk_float warping, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -); - -/* Calculation of LTP state scaling */ -void silk_LTP_scale_ctrl_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/**********************************************/ -/* Prediction Analysis */ -/**********************************************/ -/* Find pitch lags */ -void silk_find_pitch_lags_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - silk_float res[], /* O Residual */ - const silk_float x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -); - -/* Find LPC and LTP coefficients */ -void silk_find_pred_coefs_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float res_pitch[], /* I Residual from pitch analysis */ - const silk_float x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* LPC analysis */ -void silk_find_LPC_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const silk_float x[], /* I Input signal */ - const silk_float minInvGain /* I Prediction gain from LTP (dB) */ -); - -/* LTP analysis */ -void silk_find_LTP_FLP( - silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - silk_float *LTPredCodGain, /* O LTP coding gain */ - const silk_float r_lpc[], /* I LPC residual */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset /* I Number of samples in LTP memory */ -); - -void silk_LTP_analysis_filter_FLP( - silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */ - const silk_float *x, /* I Input signal, with preceding samples */ - const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int pre_length /* I Preceding samples for each subframe */ -); - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FLP( - silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - const silk_float x[], /* I Input signal */ - silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const silk_float gains[], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int LPC_order /* I LPC order */ -); - -/* 16th order LPC analysis filter */ -void silk_LPC_analysis_filter_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length, /* I Length of input signal */ - const opus_int Order /* I LPC order */ -); - -/* LTP tap quantizer */ -void silk_quant_LTP_gains_FLP( - silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ - opus_int8 *periodicity_index, /* O Periodicity index */ - opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ - const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ - const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -); - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */ - const silk_float *c, /* I Filter coefficients */ - silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */ - const silk_float *wXx, /* I Weighted correlation vector */ - const silk_float wxx, /* I Weighted correlation value */ - const opus_int D /* I Dimension */ -); - -/* Processing of gains */ -void silk_process_gains_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/******************/ -/* Linear Algebra */ -/******************/ -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FLP( - const silk_float *x, /* I x vector [ L+order-1 ] used to create X */ - const opus_int L, /* I Length of vectors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *XX /* O X'*X correlation matrix [order x order] */ -); - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FLP( - const silk_float *x, /* I x vector [L+order-1] used to create X */ - const silk_float *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vecors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *Xt /* O X'*t correlation vector [order] */ -); - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FLP( - silk_float *XX, /* I/O Correlation matrices */ - silk_float *xx, /* I/O Correlation values */ - const silk_float noise, /* I Noise energy to add */ - const opus_int D /* I Dimension of XX */ -); - -/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */ -void silk_solve_LDL_FLP( - silk_float *A, /* I/O Symmetric square matrix, out: reg. */ - const opus_int M, /* I Size of matrix */ - const silk_float *b, /* I Pointer to b vector */ - silk_float *x /* O Pointer to x solution vector */ -); - -/* Apply sine window to signal vector. */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -void silk_apply_sine_window_FLP( - silk_float px_win[], /* O Pointer to windowed signal */ - const silk_float px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -); - -/* Wrapper functions. Call flp / fix code */ - -/* Convert AR filter coefficients to NLSF parameters */ -void silk_A2NLSF_FLP( - opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */ - const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -); - -/* Convert NLSF parameters to AR prediction filter coefficients */ -void silk_NLSF2A_FLP( - silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ - const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -); - -/* Limit, stabilize, and quantize NLSFs */ -void silk_process_NLSFs_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -); - -/* Floating-point Silk NSQ wrapper */ -void silk_NSQ_wrapper_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - SideInfoIndices *psIndices, /* I/O Quantization indices */ - silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const silk_float x[] /* I Prefiltered input signal */ -); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c b/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c deleted file mode 100644 index 65f6ea5870..0000000000 --- a/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c +++ /dev/null @@ -1,365 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ -/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ -/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ -/* coefficient in an array of coefficients, for monic filters. */ -static OPUS_INLINE silk_float warped_gain( - const silk_float *coefs, - silk_float lambda, - opus_int order -) { - opus_int i; - silk_float gain; - - lambda = -lambda; - gain = coefs[ order - 1 ]; - for( i = order - 2; i >= 0; i-- ) { - gain = lambda * gain + coefs[ i ]; - } - return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) ); -} - -/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ -/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ -static OPUS_INLINE void warped_true2monic_coefs( - silk_float *coefs_syn, - silk_float *coefs_ana, - silk_float lambda, - silk_float limit, - opus_int order -) { - opus_int i, iter, ind = 0; - silk_float tmp, maxabs, chirp, gain_syn, gain_ana; - - /* Convert to monic coefficients */ - for( i = order - 1; i > 0; i-- ) { - coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; - coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; - } - gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); - gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); - for( i = 0; i < order; i++ ) { - coefs_syn[ i ] *= gain_syn; - coefs_ana[ i ] *= gain_ana; - } - - /* Limit */ - for( iter = 0; iter < 10; iter++ ) { - /* Find maximum absolute value */ - maxabs = -1.0f; - for( i = 0; i < order; i++ ) { - tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) ); - if( tmp > maxabs ) { - maxabs = tmp; - ind = i; - } - } - if( maxabs <= limit ) { - /* Coefficients are within range - done */ - return; - } - - /* Convert back to true warped coefficients */ - for( i = 1; i < order; i++ ) { - coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ]; - coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ]; - } - gain_syn = 1.0f / gain_syn; - gain_ana = 1.0f / gain_ana; - for( i = 0; i < order; i++ ) { - coefs_syn[ i ] *= gain_syn; - coefs_ana[ i ] *= gain_ana; - } - - /* Apply bandwidth expansion */ - chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) ); - silk_bwexpander_FLP( coefs_syn, order, chirp ); - silk_bwexpander_FLP( coefs_ana, order, chirp ); - - /* Convert to monic warped coefficients */ - for( i = order - 1; i > 0; i-- ) { - coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; - coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; - } - gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); - gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); - for( i = 0; i < order; i++ ) { - coefs_syn[ i ] *= gain_syn; - coefs_ana[ i ] *= gain_ana; - } - } - silk_assert( 0 ); -} - -/* Compute noise shaping coefficients and initial gain values */ -void silk_noise_shape_analysis_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float *pitch_res, /* I LPC residual from pitch analysis */ - const silk_float *x /* I Input signal [frame_length + la_shape] */ -) -{ - silk_shape_state_FLP *psShapeSt = &psEnc->sShape; - opus_int k, nSamples; - silk_float SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt; - silk_float nrg, pre_nrg, log_energy, log_energy_prev, energy_variation; - silk_float delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping; - silk_float x_windowed[ SHAPE_LPC_WIN_MAX ]; - silk_float auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - const silk_float *x_ptr, *pitch_res_ptr; - - /* Point to start of first LPC analysis block */ - x_ptr = x - psEnc->sCmn.la_shape; - - /****************/ - /* GAIN CONTROL */ - /****************/ - SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ); - - /* Input quality is the average of the quality in the lowest two VAD bands */ - psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f ); - - /* Coding quality level, between 0.0 and 1.0 */ - psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) ); - - if( psEnc->sCmn.useCBR == 0 ) { - /* Reduce coding SNR during low speech activity */ - b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b; - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce gains for periodic signals */ - SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr; - } else { - /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ - SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality ); - } - - /*************************/ - /* SPARSENESS PROCESSING */ - /*************************/ - /* Set quantizer offset */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Initially set to 0; may be overruled in process_gains(..) */ - psEnc->sCmn.indices.quantOffsetType = 0; - psEncCtrl->sparseness = 0.0f; - } else { - /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ - nSamples = 2 * psEnc->sCmn.fs_kHz; - energy_variation = 0.0f; - log_energy_prev = 0.0f; - pitch_res_ptr = pitch_res; - for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { - nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples ); - log_energy = silk_log2( nrg ); - if( k > 0 ) { - energy_variation += silk_abs_float( log_energy - log_energy_prev ); - } - log_energy_prev = log_energy; - pitch_res_ptr += nSamples; - } - psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) ); - - /* Set quantization offset depending on sparseness measure */ - if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - - /* Increase coding SNR for sparse signals */ - SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f ); - } - - /*******************************/ - /* Control bandwidth expansion */ - /*******************************/ - /* More BWE for signals with high prediction gain */ - strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain; /* between 0.0 and 1.0 */ - BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength ); - delta = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality ); - BWExp1 -= delta; - BWExp2 += delta; - /* BWExp1 will be applied after BWExp2, so make it relative */ - BWExp1 /= BWExp2; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality; - } else { - warping = 0.0f; - } - - /********************************************/ - /* Compute noise shaping AR coefs and gains */ - /********************************************/ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Apply window: sine slope followed by flat part followed by cosine slope */ - opus_int shift, slope_part, flat_part; - flat_part = psEnc->sCmn.fs_kHz * 3; - slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2; - - silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part ); - shift = slope_part; - silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) ); - shift += flat_part; - silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part ); - - /* Update pointer: next LPC analysis block */ - x_ptr += psEnc->sCmn.subfr_length; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping, - psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); - } else { - /* Calculate regular auto correlation */ - silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); - } - - /* Add white noise, as a fraction of energy */ - auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION; - - /* Convert correlations to prediction coefficients, and compute residual energy */ - nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder ); - psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Adjust gain for warping */ - psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder ); - } - - /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 ); - - /* Compute noise shaping filter coefficients */ - silk_memcpy( - &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], - &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], - psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) ); - - /* Bandwidth expansion for analysis filter shaping */ - silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 ); - - /* Ratio of prediction gains, in energy domain */ - pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); - nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); - psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ); - - /* Convert to monic warped prediction coefficients and limit absolute values */ - warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], - warping, 3.999f, psEnc->sCmn.shapingLPCOrder ); - } - - /*****************/ - /* Gain tweaking */ - /*****************/ - /* Increase gains during low speech activity */ - gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB ); - gain_add = (silk_float)pow( 2.0f, 0.16f * MIN_QGAIN_DB ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] *= gain_mult; - psEncCtrl->Gains[ k ] += gain_add; - } - - gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT; - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->GainsPre[ k ] *= gain_mult; - } - - /************************************************/ - /* Control low-frequency shaping and noise tilt */ - /************************************************/ - /* Less low frequency shaping for noisy inputs */ - strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) ); - strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ - /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ]; - psEncCtrl->LF_MA_shp[ k ] = -1.0f + b; - psEncCtrl->LF_AR_shp[ k ] = 1.0f - b - b * strength; - } - Tilt = - HP_NOISE_COEF - - (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - } else { - b = 1.3f / psEnc->sCmn.fs_kHz; - psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b; - psEncCtrl->LF_AR_shp[ 0 ] = 1.0f - b - b * strength * 0.6f; - for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ]; - psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ]; - } - Tilt = -HP_NOISE_COEF; - } - - /****************************/ - /* HARMONIC SHAPING CONTROL */ - /****************************/ - /* Control boosting of harmonic frequencies */ - HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr; - - /* More harmonic boost for noisy input signals */ - HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality ); - - if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Harmonic noise shaping */ - HarmShapeGain = HARMONIC_SHAPING; - - /* More harmonic noise shaping for high bitrates or noisy input */ - HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING * - ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality ); - - /* Less harmonic noise shaping for less periodic signals */ - HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr ); - } else { - HarmShapeGain = 0.0f; - } - - /*************************/ - /* Smooth over subframes */ - /*************************/ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psShapeSt->HarmBoost_smth += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth ); - psEncCtrl->HarmBoost[ k ] = psShapeSt->HarmBoost_smth; - psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth ); - psEncCtrl->HarmShapeGain[ k ] = psShapeSt->HarmShapeGain_smth; - psShapeSt->Tilt_smth += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth ); - psEncCtrl->Tilt[ k ] = psShapeSt->Tilt_smth; - } -} diff --git a/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c b/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c deleted file mode 100644 index d0e637a29d..0000000000 --- a/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c +++ /dev/null @@ -1,630 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/***************************************************************************** -* Pitch analyser function -******************************************************************************/ -#include "SigProc_FLP.h" -#include "SigProc_FIX.h" -#include "pitch_est_defines.h" -#include "pitch.h" - -#define SCRATCH_SIZE 22 - -/************************************************************/ -/* Internally used functions */ -/************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -); - -static void silk_P_Ana_calc_energy_st3( - silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ -); - -/************************************************************/ -/* CORE PITCH ANALYSIS FUNCTION */ -/************************************************************/ -opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ - const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I Number of 5 ms subframes */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, k, d, j; - silk_float frame_8kHz[ PE_MAX_FRAME_LENGTH_MS * 8 ]; - silk_float frame_4kHz[ PE_MAX_FRAME_LENGTH_MS * 4 ]; - opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ]; - opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ]; - opus_int32 filt_state[ 6 ]; - silk_float threshold, contour_bias; - silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ]; - opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ]; - silk_float CC[ PE_NB_CBKS_STAGE2_EXT ]; - const silk_float *target_ptr, *basis_ptr; - double cross_corr, normalizer, energy, energy_tmp; - opus_int d_srch[ PE_D_SRCH_LENGTH ]; - opus_int16 d_comp[ (PE_MAX_LAG >> 1) + 5 ]; - opus_int length_d_srch, length_d_comp; - silk_float Cmax, CCmax, CCmax_b, CCmax_new_b, CCmax_new; - opus_int CBimax, CBimax_new, lag, start_lag, end_lag, lag_new; - opus_int cbk_size; - silk_float lag_log2, prevLag_log2, delta_lag_log2_sqr; - silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ]; - silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ]; - opus_int lag_counter; - opus_int frame_length, frame_length_8kHz, frame_length_4kHz; - opus_int sf_length, sf_length_8kHz, sf_length_4kHz; - opus_int min_lag, min_lag_8kHz, min_lag_4kHz; - opus_int max_lag, max_lag_8kHz, max_lag_4kHz; - opus_int nb_cbk_search; - const opus_int8 *Lag_CB_ptr; - - /* Check for valid sampling frequency */ - silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); - - /* Check for valid complexity setting */ - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f ); - silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f ); - - /* Set up frame lengths max / min lag for the sampling frequency */ - frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz; - frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4; - frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8; - sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz; - sf_length_4kHz = PE_SUBFR_LENGTH_MS * 4; - sf_length_8kHz = PE_SUBFR_LENGTH_MS * 8; - min_lag = PE_MIN_LAG_MS * Fs_kHz; - min_lag_4kHz = PE_MIN_LAG_MS * 4; - min_lag_8kHz = PE_MIN_LAG_MS * 8; - max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; - max_lag_4kHz = PE_MAX_LAG_MS * 4; - max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1; - - /* Resample from input sampled at Fs_kHz to 8 kHz */ - if( Fs_kHz == 16 ) { - /* Resample to 16 -> 8 khz */ - opus_int16 frame_16_FIX[ 16 * PE_MAX_FRAME_LENGTH_MS ]; - silk_float2short_array( frame_16_FIX, frame, frame_length ); - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_8_FIX, frame_16_FIX, frame_length ); - silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz ); - } else if( Fs_kHz == 12 ) { - /* Resample to 12 -> 8 khz */ - opus_int16 frame_12_FIX[ 12 * PE_MAX_FRAME_LENGTH_MS ]; - silk_float2short_array( frame_12_FIX, frame, frame_length ); - silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); - silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length ); - silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz ); - } else { - silk_assert( Fs_kHz == 8 ); - silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz ); - } - - /* Decimate again to 4 kHz */ - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_4_FIX, frame_8_FIX, frame_length_8kHz ); - silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz ); - - /* Low-pass filter */ - for( i = frame_length_4kHz - 1; i > 0; i-- ) { - frame_4kHz[ i ] += frame_4kHz[ i - 1 ]; - } - - /****************************************************************************** - * FIRST STAGE, operating in 4 khz - ******************************************************************************/ - silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5)); - target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ]; - for( k = 0; k < nb_subfr >> 1; k++ ) { - /* Check that we are within range of the array */ - silk_assert( target_ptr >= frame_4kHz ); - silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - - basis_ptr = target_ptr - min_lag_4kHz; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - - celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch ); - - /* Calculate first vector products before loop */ - cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; - normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + - silk_energy_FLP( basis_ptr, sf_length_8kHz ) + - sf_length_8kHz * 4000.0f; - - C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer ); - - /* From now on normalizer is computed recursively */ - for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) { - basis_ptr--; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - - cross_corr = xcorr[ max_lag_4kHz - d ]; - - /* Add contribution of new sample and remove contribution from oldest sample */ - normalizer += - basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] - - basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ]; - C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer ); - } - /* Update target pointer */ - target_ptr += sf_length_8kHz; - } - - /* Apply short-lag bias */ - for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) { - C[ 0 ][ i ] -= C[ 0 ][ i ] * i / 4096.0f; - } - - /* Sort */ - length_d_srch = 4 + 2 * complexity; - silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); - silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch ); - - /* Escape if correlation is very low already here */ - Cmax = C[ 0 ][ min_lag_4kHz ]; - if( Cmax < 0.2f ) { - silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); - *LTPCorr = 0.0f; - *lagIndex = 0; - *contourIndex = 0; - return 1; - } - - threshold = search_thres1 * Cmax; - for( i = 0; i < length_d_srch; i++ ) { - /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ - if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) { - d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 ); - } else { - length_d_srch = i; - break; - } - } - silk_assert( length_d_srch > 0 ); - - for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) { - d_comp[ i ] = 0; - } - for( i = 0; i < length_d_srch; i++ ) { - d_comp[ d_srch[ i ] ] = 1; - } - - /* Convolution */ - for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) { - d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ]; - } - - length_d_srch = 0; - for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) { - if( d_comp[ i + 1 ] > 0 ) { - d_srch[ length_d_srch ] = i; - length_d_srch++; - } - } - - /* Convolution */ - for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) { - d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ]; - } - - length_d_comp = 0; - for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) { - if( d_comp[ i ] > 0 ) { - d_comp[ length_d_comp ] = (opus_int16)( i - 2 ); - length_d_comp++; - } - } - - /********************************************************************************** - ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation - *************************************************************************************/ - /********************************************************************************* - * Find energy of each subframe projected onto its history, for a range of delays - *********************************************************************************/ - silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float)); - - if( Fs_kHz == 8 ) { - target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * 8 ]; - } else { - target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; - } - for( k = 0; k < nb_subfr; k++ ) { - energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0; - for( j = 0; j < length_d_comp; j++ ) { - d = d_comp[ j ]; - basis_ptr = target_ptr - d; - cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz ); - if( cross_corr > 0.0f ) { - energy = silk_energy_FLP( basis_ptr, sf_length_8kHz ); - C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) ); - } else { - C[ k ][ d ] = 0.0f; - } - } - target_ptr += sf_length_8kHz; - } - - /* search over lag range and lags codebook */ - /* scale factor for lag codebook, as a function of center lag */ - - CCmax = 0.0f; /* This value doesn't matter */ - CCmax_b = -1000.0f; - - CBimax = 0; /* To avoid returning undefined lag values */ - lag = -1; /* To check if lag with strong enough correlation has been found */ - - if( prevLag > 0 ) { - if( Fs_kHz == 12 ) { - prevLag = silk_LSHIFT( prevLag, 1 ) / 3; - } else if( Fs_kHz == 16 ) { - prevLag = silk_RSHIFT( prevLag, 1 ); - } - prevLag_log2 = silk_log2( (silk_float)prevLag ); - } else { - prevLag_log2 = 0; - } - - /* Set up stage 2 codebook based on number of subframes */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - cbk_size = PE_NB_CBKS_STAGE2_EXT; - Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; - if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) { - /* If input is 8 khz use a larger codebook here because it is last stage */ - nb_cbk_search = PE_NB_CBKS_STAGE2_EXT; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE2; - } - } else { - cbk_size = PE_NB_CBKS_STAGE2_10MS; - Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE2_10MS; - } - - for( k = 0; k < length_d_srch; k++ ) { - d = d_srch[ k ]; - for( j = 0; j < nb_cbk_search; j++ ) { - CC[j] = 0.0f; - for( i = 0; i < nb_subfr; i++ ) { - /* Try all codebooks */ - CC[ j ] += C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )]; - } - } - /* Find best codebook */ - CCmax_new = -1000.0f; - CBimax_new = 0; - for( i = 0; i < nb_cbk_search; i++ ) { - if( CC[ i ] > CCmax_new ) { - CCmax_new = CC[ i ]; - CBimax_new = i; - } - } - - /* Bias towards shorter lags */ - lag_log2 = silk_log2( (silk_float)d ); - CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2; - - /* Bias towards previous lag */ - if( prevLag > 0 ) { - delta_lag_log2_sqr = lag_log2 - prevLag_log2; - delta_lag_log2_sqr *= delta_lag_log2_sqr; - CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f ); - } - - if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ - CCmax_new > nb_subfr * search_thres2 /* Correlation needs to be high enough to be voiced */ - ) { - CCmax_b = CCmax_new_b; - CCmax = CCmax_new; - lag = d; - CBimax = CBimax_new; - } - } - - if( lag == -1 ) { - /* No suitable candidate found */ - silk_memset( pitch_out, 0, PE_MAX_NB_SUBFR * sizeof(opus_int) ); - *LTPCorr = 0.0f; - *lagIndex = 0; - *contourIndex = 0; - return 1; - } - - /* Output normalized correlation */ - *LTPCorr = (silk_float)( CCmax / nb_subfr ); - silk_assert( *LTPCorr >= 0.0f ); - - if( Fs_kHz > 8 ) { - /* Search in original signal */ - - /* Compensate for decimation */ - silk_assert( lag == silk_SAT16( lag ) ); - if( Fs_kHz == 12 ) { - lag = silk_RSHIFT_ROUND( silk_SMULBB( lag, 3 ), 1 ); - } else { /* Fs_kHz == 16 */ - lag = silk_LSHIFT( lag, 1 ); - } - - lag = silk_LIMIT_int( lag, min_lag, max_lag ); - start_lag = silk_max_int( lag - 2, min_lag ); - end_lag = silk_min_int( lag + 2, max_lag ); - lag_new = lag; /* to avoid undefined lag */ - CBimax = 0; /* to avoid undefined lag */ - - CCmax = -1000.0f; - - /* Calculate the correlations and energies needed in stage 3 */ - silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); - silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity ); - - lag_counter = 0; - silk_assert( lag == silk_SAT16( lag ) ); - contour_bias = PE_FLATCONTOUR_BIAS / lag; - - /* Set up cbk parameters according to complexity setting and frame length */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - } - - target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; - energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0; - for( d = start_lag; d <= end_lag; d++ ) { - for( j = 0; j < nb_cbk_search; j++ ) { - cross_corr = 0.0; - energy = energy_tmp; - for( k = 0; k < nb_subfr; k++ ) { - cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ]; - energy += energies_st3[ k ][ j ][ lag_counter ]; - } - if( cross_corr > 0.0 ) { - CCmax_new = (silk_float)( 2 * cross_corr / energy ); - /* Reduce depending on flatness of contour */ - CCmax_new *= 1.0f - contour_bias * j; - } else { - CCmax_new = 0.0f; - } - - if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { - CCmax = CCmax_new; - lag_new = d; - CBimax = j; - } - } - lag_counter++; - } - - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz ); - } - *lagIndex = (opus_int16)( lag_new - min_lag ); - *contourIndex = (opus_int8)CBimax; - } else { /* Fs_kHz == 8 */ - /* Save Lags */ - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 ); - } - *lagIndex = (opus_int16)( lag - min_lag_8kHz ); - *contourIndex = (opus_int8)CBimax; - } - silk_assert( *lagIndex >= 0 ); - /* return as voiced */ - return 0; -} - -/*********************************************************************** - * Calculates the correlations used in stage 3 search. In order to cover - * the whole lag codebook for all the searched offset lags (lag +- 2), - * the following correlations are needed in each sub frame: - * - * sf1: lag range [-8,...,7] total 16 correlations - * sf2: lag range [-4,...,4] total 9 correlations - * sf3: lag range [-3,....4] total 8 correltions - * sf4: lag range [-6,....8] total 15 correlations - * - * In total 48 correlations. The direct implementation computed in worst - * case 4*12*5 = 240 correlations, but more likely around 120. - ***********************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -) -{ - const silk_float *target_ptr; - opus_int i, j, k, lag_counter, lag_low, lag_high; - opus_int nb_cbk_search, delta, idx, cbk_size; - silk_float scratch_mem[ SCRATCH_SIZE ]; - opus_val32 xcorr[ SCRATCH_SIZE ]; - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the correlations for each subframe */ - lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); - silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch ); - for( j = lag_low; j <= lag_high; j++ ) { - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = xcorr[ lag_high - j ]; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ]; - } - } - target_ptr += sf_length; - } -} - -/********************************************************************/ -/* Calculate the energies for first two subframes. The energies are */ -/* calculated recursively. */ -/********************************************************************/ -static void silk_P_Ana_calc_energy_st3( - silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ -) -{ - const silk_float *target_ptr, *basis_ptr; - double energy; - opus_int k, i, j, lag_counter; - opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff; - silk_float scratch_mem[ SCRATCH_SIZE ]; - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the energy for first lag */ - basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); - energy = silk_energy_FLP( basis_ptr, sf_length ) + 1e-3; - silk_assert( energy >= 0.0 ); - scratch_mem[lag_counter] = (silk_float)energy; - lag_counter++; - - lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 ); - for( i = 1; i < lag_diff; i++ ) { - /* remove part outside new window */ - energy -= basis_ptr[sf_length - i] * (double)basis_ptr[sf_length - i]; - silk_assert( energy >= 0.0 ); - - /* add part that comes into window */ - energy += basis_ptr[ -i ] * (double)basis_ptr[ -i ]; - silk_assert( energy >= 0.0 ); - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[lag_counter] = (silk_float)energy; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ]; - silk_assert( energies_st3[ k ][ i ][ j ] >= 0.0f ); - } - } - target_ptr += sf_length; - } -} diff --git a/thirdparty/opus/silk/float/prefilter_FLP.c b/thirdparty/opus/silk/float/prefilter_FLP.c deleted file mode 100644 index 8bc32fb410..0000000000 --- a/thirdparty/opus/silk/float/prefilter_FLP.c +++ /dev/null @@ -1,206 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* -* Prefilter for finding Quantizer input signal -*/ -static OPUS_INLINE void silk_prefilt_FLP( - silk_prefilter_state_FLP *P, /* I/O state */ - silk_float st_res[], /* I */ - silk_float xw[], /* O */ - silk_float *HarmShapeFIR, /* I */ - silk_float Tilt, /* I */ - silk_float LF_MA_shp, /* I */ - silk_float LF_AR_shp, /* I */ - opus_int lag, /* I */ - opus_int length /* I */ -); - -static void silk_warped_LPC_analysis_filter_FLP( - silk_float state[], /* I/O State [order + 1] */ - silk_float res[], /* O Residual signal [length] */ - const silk_float coef[], /* I Coefficients [order] */ - const silk_float input[], /* I Input signal [length] */ - const silk_float lambda, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) -{ - opus_int n, i; - silk_float acc, tmp1, tmp2; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - for( n = 0; n < length; n++ ) { - /* Output of lowpass section */ - tmp2 = state[ 0 ] + lambda * state[ 1 ]; - state[ 0 ] = input[ n ]; - /* Output of allpass section */ - tmp1 = state[ 1 ] + lambda * ( state[ 2 ] - tmp2 ); - state[ 1 ] = tmp2; - acc = coef[ 0 ] * tmp2; - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = state[ i ] + lambda * ( state[ i + 1 ] - tmp1 ); - state[ i ] = tmp1; - acc += coef[ i - 1 ] * tmp1; - /* Output of allpass section */ - tmp1 = state[ i + 1 ] + lambda * ( state[ i + 2 ] - tmp2 ); - state[ i + 1 ] = tmp2; - acc += coef[ i ] * tmp2; - } - state[ order ] = tmp1; - acc += coef[ order - 1 ] * tmp1; - res[ n ] = input[ n ] - acc; - } -} - -/* -* silk_prefilter. Main prefilter function -*/ -void silk_prefilter_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ - silk_float xw[], /* O Weighted signal */ - const silk_float x[] /* I Speech signal */ -) -{ - silk_prefilter_state_FLP *P = &psEnc->sPrefilt; - opus_int j, k, lag; - silk_float HarmShapeGain, Tilt, LF_MA_shp, LF_AR_shp; - silk_float B[ 2 ]; - const silk_float *AR1_shp; - const silk_float *px; - silk_float *pxw; - silk_float HarmShapeFIR[ 3 ]; - silk_float st_res[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ]; - - /* Set up pointers */ - px = x; - pxw = xw; - lag = P->lagPrev; - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Update Variables that change per sub frame */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - lag = psEncCtrl->pitchL[ k ]; - } - - /* Noise shape parameters */ - HarmShapeGain = psEncCtrl->HarmShapeGain[ k ] * ( 1.0f - psEncCtrl->HarmBoost[ k ] ); - HarmShapeFIR[ 0 ] = 0.25f * HarmShapeGain; - HarmShapeFIR[ 1 ] = 32767.0f / 65536.0f * HarmShapeGain; - HarmShapeFIR[ 2 ] = 0.25f * HarmShapeGain; - Tilt = psEncCtrl->Tilt[ k ]; - LF_MA_shp = psEncCtrl->LF_MA_shp[ k ]; - LF_AR_shp = psEncCtrl->LF_AR_shp[ k ]; - AR1_shp = &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Short term FIR filtering */ - silk_warped_LPC_analysis_filter_FLP( P->sAR_shp, st_res, AR1_shp, px, - (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder ); - - /* Reduce (mainly) low frequencies during harmonic emphasis */ - B[ 0 ] = psEncCtrl->GainsPre[ k ]; - B[ 1 ] = -psEncCtrl->GainsPre[ k ] * - ( psEncCtrl->HarmBoost[ k ] * HarmShapeGain + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT ); - pxw[ 0 ] = B[ 0 ] * st_res[ 0 ] + B[ 1 ] * P->sHarmHP; - for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { - pxw[ j ] = B[ 0 ] * st_res[ j ] + B[ 1 ] * st_res[ j - 1 ]; - } - P->sHarmHP = st_res[ psEnc->sCmn.subfr_length - 1 ]; - - silk_prefilt_FLP( P, pxw, pxw, HarmShapeFIR, Tilt, LF_MA_shp, LF_AR_shp, lag, psEnc->sCmn.subfr_length ); - - px += psEnc->sCmn.subfr_length; - pxw += psEnc->sCmn.subfr_length; - } - P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; -} - -/* -* Prefilter for finding Quantizer input signal -*/ -static OPUS_INLINE void silk_prefilt_FLP( - silk_prefilter_state_FLP *P, /* I/O state */ - silk_float st_res[], /* I */ - silk_float xw[], /* O */ - silk_float *HarmShapeFIR, /* I */ - silk_float Tilt, /* I */ - silk_float LF_MA_shp, /* I */ - silk_float LF_AR_shp, /* I */ - opus_int lag, /* I */ - opus_int length /* I */ -) -{ - opus_int i; - opus_int idx, LTP_shp_buf_idx; - silk_float n_Tilt, n_LF, n_LTP; - silk_float sLF_AR_shp, sLF_MA_shp; - silk_float *LTP_shp_buf; - - /* To speed up use temp variables instead of using the struct */ - LTP_shp_buf = P->sLTP_shp; - LTP_shp_buf_idx = P->sLTP_shp_buf_idx; - sLF_AR_shp = P->sLF_AR_shp; - sLF_MA_shp = P->sLF_MA_shp; - - for( i = 0; i < length; i++ ) { - if( lag > 0 ) { - silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); - idx = lag + LTP_shp_buf_idx; - n_LTP = LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ] * HarmShapeFIR[ 0 ]; - n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ] * HarmShapeFIR[ 1 ]; - n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ] * HarmShapeFIR[ 2 ]; - } else { - n_LTP = 0; - } - - n_Tilt = sLF_AR_shp * Tilt; - n_LF = sLF_AR_shp * LF_AR_shp + sLF_MA_shp * LF_MA_shp; - - sLF_AR_shp = st_res[ i ] - n_Tilt; - sLF_MA_shp = sLF_AR_shp - n_LF; - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = sLF_MA_shp; - - xw[ i ] = sLF_MA_shp - n_LTP; - } - /* Copy temp variable back to state */ - P->sLF_AR_shp = sLF_AR_shp; - P->sLF_MA_shp = sLF_MA_shp; - P->sLTP_shp_buf_idx = LTP_shp_buf_idx; -} diff --git a/thirdparty/opus/silk/float/process_gains_FLP.c b/thirdparty/opus/silk/float/process_gains_FLP.c deleted file mode 100644 index c0da0dae44..0000000000 --- a/thirdparty/opus/silk/float/process_gains_FLP.c +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* Processing of gains */ -void silk_process_gains_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - silk_shape_state_FLP *psShapeSt = &psEnc->sShape; - opus_int k; - opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; - silk_float s, InvMaxSqrVal, gain, quant_offset; - - /* Gain reduction when LTP coding gain is high */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - s = 1.0f - 0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] *= s; - } - } - - /* Limit the quantized signal */ - InvMaxSqrVal = ( silk_float )( pow( 2.0f, 0.33f * ( 21.0f - psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) ) ) / psEnc->sCmn.subfr_length ); - - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Soft limit on ratio residual energy and squared gains */ - gain = psEncCtrl->Gains[ k ]; - gain = ( silk_float )sqrt( gain * gain + psEncCtrl->ResNrg[ k ] * InvMaxSqrVal ); - psEncCtrl->Gains[ k ] = silk_min_float( gain, 32767.0f ); - } - - /* Prepare gains for noise shaping quantization */ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - pGains_Q16[ k ] = (opus_int32)( psEncCtrl->Gains[ k ] * 65536.0f ); - } - - /* Save unquantized gains and gain Index */ - silk_memcpy( psEncCtrl->GainsUnq_Q16, pGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex; - - /* Quantize gains */ - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, - &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] = pGains_Q16[ k ] / 65536.0f; - } - - /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - if( psEncCtrl->LTPredCodGain + psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ) > 1.0f ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - } - - /* Quantizer boundary adjustment */ - quant_offset = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ] / 1024.0f; - psEncCtrl->Lambda = LAMBDA_OFFSET - + LAMBDA_DELAYED_DECISIONS * psEnc->sCmn.nStatesDelayedDecision - + LAMBDA_SPEECH_ACT * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ) - + LAMBDA_INPUT_QUALITY * psEncCtrl->input_quality - + LAMBDA_CODING_QUALITY * psEncCtrl->coding_quality - + LAMBDA_QUANT_OFFSET * quant_offset; - - silk_assert( psEncCtrl->Lambda > 0.0f ); - silk_assert( psEncCtrl->Lambda < 2.0f ); -} diff --git a/thirdparty/opus/silk/float/regularize_correlations_FLP.c b/thirdparty/opus/silk/float/regularize_correlations_FLP.c deleted file mode 100644 index df4612604c..0000000000 --- a/thirdparty/opus/silk/float/regularize_correlations_FLP.c +++ /dev/null @@ -1,48 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FLP( - silk_float *XX, /* I/O Correlation matrices */ - silk_float *xx, /* I/O Correlation values */ - const silk_float noise, /* I Noise energy to add */ - const opus_int D /* I Dimension of XX */ -) -{ - opus_int i; - - for( i = 0; i < D; i++ ) { - matrix_ptr( &XX[ 0 ], i, i, D ) += noise; - } - xx[ 0 ] += noise; -} diff --git a/thirdparty/opus/silk/float/residual_energy_FLP.c b/thirdparty/opus/silk/float/residual_energy_FLP.c deleted file mode 100644 index b2e03a86a4..0000000000 --- a/thirdparty/opus/silk/float/residual_energy_FLP.c +++ /dev/null @@ -1,117 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -#define MAX_ITERATIONS_RESIDUAL_NRG 10 -#define REGULARIZATION_FACTOR 1e-8f - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */ - const silk_float *c, /* I Filter coefficients */ - silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */ - const silk_float *wXx, /* I Weighted correlation vector */ - const silk_float wxx, /* I Weighted correlation value */ - const opus_int D /* I Dimension */ -) -{ - opus_int i, j, k; - silk_float tmp, nrg = 0.0f, regularization; - - /* Safety checks */ - silk_assert( D >= 0 ); - - regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] ); - for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) { - nrg = wxx; - - tmp = 0.0f; - for( i = 0; i < D; i++ ) { - tmp += wXx[ i ] * c[ i ]; - } - nrg -= 2.0f * tmp; - - /* compute c' * wXX * c, assuming wXX is symmetric */ - for( i = 0; i < D; i++ ) { - tmp = 0.0f; - for( j = i + 1; j < D; j++ ) { - tmp += matrix_c_ptr( wXX, i, j, D ) * c[ j ]; - } - nrg += c[ i ] * ( 2.0f * tmp + matrix_c_ptr( wXX, i, i, D ) * c[ i ] ); - } - if( nrg > 0 ) { - break; - } else { - /* Add white noise */ - for( i = 0; i < D; i++ ) { - matrix_c_ptr( wXX, i, i, D ) += regularization; - } - /* Increase noise for next run */ - regularization *= 2.0f; - } - } - if( k == MAX_ITERATIONS_RESIDUAL_NRG ) { - silk_assert( nrg == 0 ); - nrg = 1.0f; - } - - return nrg; -} - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FLP( - silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - const silk_float x[], /* I Input signal */ - silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const silk_float gains[], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int LPC_order /* I LPC order */ -) -{ - opus_int shift; - silk_float *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ]; - - LPC_res_ptr = LPC_res + LPC_order; - shift = LPC_order + subfr_length; - - /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ - silk_LPC_analysis_filter_FLP( LPC_res, a[ 0 ], x + 0 * shift, 2 * shift, LPC_order ); - nrgs[ 0 ] = ( silk_float )( gains[ 0 ] * gains[ 0 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) ); - nrgs[ 1 ] = ( silk_float )( gains[ 1 ] * gains[ 1 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) ); - - if( nb_subfr == MAX_NB_SUBFR ) { - silk_LPC_analysis_filter_FLP( LPC_res, a[ 1 ], x + 2 * shift, 2 * shift, LPC_order ); - nrgs[ 2 ] = ( silk_float )( gains[ 2 ] * gains[ 2 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) ); - nrgs[ 3 ] = ( silk_float )( gains[ 3 ] * gains[ 3 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) ); - } -} diff --git a/thirdparty/opus/silk/float/scale_copy_vector_FLP.c b/thirdparty/opus/silk/float/scale_copy_vector_FLP.c deleted file mode 100644 index 20db32b3b1..0000000000 --- a/thirdparty/opus/silk/float/scale_copy_vector_FLP.c +++ /dev/null @@ -1,57 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* copy and multiply a vector by a constant */ -void silk_scale_copy_vector_FLP( - silk_float *data_out, - const silk_float *data_in, - silk_float gain, - opus_int dataSize -) -{ - opus_int i, dataSize4; - - /* 4x unrolled loop */ - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - data_out[ i + 0 ] = gain * data_in[ i + 0 ]; - data_out[ i + 1 ] = gain * data_in[ i + 1 ]; - data_out[ i + 2 ] = gain * data_in[ i + 2 ]; - data_out[ i + 3 ] = gain * data_in[ i + 3 ]; - } - - /* any remaining elements */ - for( ; i < dataSize; i++ ) { - data_out[ i ] = gain * data_in[ i ]; - } -} diff --git a/thirdparty/opus/silk/float/scale_vector_FLP.c b/thirdparty/opus/silk/float/scale_vector_FLP.c deleted file mode 100644 index 108fdcbed5..0000000000 --- a/thirdparty/opus/silk/float/scale_vector_FLP.c +++ /dev/null @@ -1,56 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* multiply a vector by a constant */ -void silk_scale_vector_FLP( - silk_float *data1, - silk_float gain, - opus_int dataSize -) -{ - opus_int i, dataSize4; - - /* 4x unrolled loop */ - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - data1[ i + 0 ] *= gain; - data1[ i + 1 ] *= gain; - data1[ i + 2 ] *= gain; - data1[ i + 3 ] *= gain; - } - - /* any remaining elements */ - for( ; i < dataSize; i++ ) { - data1[ i ] *= gain; - } -} diff --git a/thirdparty/opus/silk/float/schur_FLP.c b/thirdparty/opus/silk/float/schur_FLP.c deleted file mode 100644 index ee436f8351..0000000000 --- a/thirdparty/opus/silk/float/schur_FLP.c +++ /dev/null @@ -1,70 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -silk_float silk_schur_FLP( /* O returns residual energy */ - silk_float refl_coef[], /* O reflection coefficients (length order) */ - const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */ - opus_int order /* I order */ -) -{ - opus_int k, n; - silk_float C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; - silk_float Ctmp1, Ctmp2, rc_tmp; - - silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); - - /* Copy correlations */ - for( k = 0; k < order+1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ]; - } - - for( k = 0; k < order; k++ ) { - /* Get reflection coefficient */ - rc_tmp = -C[ k + 1 ][ 0 ] / silk_max_float( C[ 0 ][ 1 ], 1e-9f ); - - /* Save the output */ - refl_coef[ k ] = rc_tmp; - - /* Update correlations */ - for( n = 0; n < order - k; n++ ) { - Ctmp1 = C[ n + k + 1 ][ 0 ]; - Ctmp2 = C[ n ][ 1 ]; - C[ n + k + 1 ][ 0 ] = Ctmp1 + Ctmp2 * rc_tmp; - C[ n ][ 1 ] = Ctmp2 + Ctmp1 * rc_tmp; - } - } - - /* Return residual energy */ - return C[ 0 ][ 1 ]; -} - diff --git a/thirdparty/opus/silk/float/solve_LS_FLP.c b/thirdparty/opus/silk/float/solve_LS_FLP.c deleted file mode 100644 index 7c90d665a0..0000000000 --- a/thirdparty/opus/silk/float/solve_LS_FLP.c +++ /dev/null @@ -1,207 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/********************************************************************** - * LDL Factorisation. Finds the upper triangular matrix L and the diagonal - * Matrix D (only the diagonal elements returned in a vector)such that - * the symmetric matric A is given by A = L*D*L'. - **********************************************************************/ -static OPUS_INLINE void silk_LDL_FLP( - silk_float *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ - silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ -); - -/********************************************************************** - * Function to solve linear equation Ax = b, when A is a MxM lower - * triangular matrix, with ones on the diagonal. - **********************************************************************/ -static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -); - -/********************************************************************** - * Function to solve linear equation (A^T)x = b, when A is a MxM lower - * triangular, with ones on the diagonal. (ie then A^T is upper triangular) - **********************************************************************/ -static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -); - -/********************************************************************** - * Function to solve linear equation Ax = b, when A is a MxM - * symmetric square matrix - using LDL factorisation - **********************************************************************/ -void silk_solve_LDL_FLP( - silk_float *A, /* I/O Symmetric square matrix, out: reg. */ - const opus_int M, /* I Size of matrix */ - const silk_float *b, /* I Pointer to b vector */ - silk_float *x /* O Pointer to x solution vector */ -) -{ - opus_int i; - silk_float L[ MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ]; - silk_float T[ MAX_MATRIX_SIZE ]; - silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/ - - silk_assert( M <= MAX_MATRIX_SIZE ); - - /*************************************************** - Factorize A by LDL such that A = L*D*(L^T), - where L is lower triangular with ones on diagonal - ****************************************************/ - silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv ); - - /**************************************************** - * substitute D*(L^T) = T. ie: - L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b - ******************************************************/ - silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T ); - - /**************************************************** - D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is - diagonal just multiply with 1/d_i - ****************************************************/ - for( i = 0; i < M; i++ ) { - T[ i ] = T[ i ] * Dinv[ i ]; - } - /**************************************************** - x = inv(L') * inv(D) * T - *****************************************************/ - silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x ); -} - -static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -) -{ - opus_int i, j; - silk_float temp; - const silk_float *ptr1; - - for( i = M - 1; i >= 0; i-- ) { - ptr1 = matrix_adr( L, 0, i, M ); - temp = 0; - for( j = M - 1; j > i ; j-- ) { - temp += ptr1[ j * M ] * x[ j ]; - } - temp = b[ i ] - temp; - x[ i ] = temp; - } -} - -static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -) -{ - opus_int i, j; - silk_float temp; - const silk_float *ptr1; - - for( i = 0; i < M; i++ ) { - ptr1 = matrix_adr( L, i, 0, M ); - temp = 0; - for( j = 0; j < i; j++ ) { - temp += ptr1[ j ] * x[ j ]; - } - temp = b[ i ] - temp; - x[ i ] = temp; - } -} - -static OPUS_INLINE void silk_LDL_FLP( - silk_float *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ - silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ -) -{ - opus_int i, j, k, loop_count, err = 1; - silk_float *ptr1, *ptr2; - double temp, diag_min_value; - silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/ - - silk_assert( M <= MAX_MATRIX_SIZE ); - - diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] ); - for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) { - err = 0; - for( j = 0; j < M; j++ ) { - ptr1 = matrix_adr( L, j, 0, M ); - temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/ - for( i = 0; i < j; i++ ) { - v[ i ] = ptr1[ i ] * D[ i ]; - temp -= ptr1[ i ] * v[ i ]; - } - if( temp < diag_min_value ) { - /* Badly conditioned matrix: add white noise and run again */ - temp = ( loop_count + 1 ) * diag_min_value - temp; - for( i = 0; i < M; i++ ) { - matrix_ptr( A, i, i, M ) += ( silk_float )temp; - } - err = 1; - break; - } - D[ j ] = ( silk_float )temp; - Dinv[ j ] = ( silk_float )( 1.0f / temp ); - matrix_ptr( L, j, j, M ) = 1.0f; - - ptr1 = matrix_adr( A, j, 0, M ); - ptr2 = matrix_adr( L, j + 1, 0, M); - for( i = j + 1; i < M; i++ ) { - temp = 0.0; - for( k = 0; k < j; k++ ) { - temp += ptr2[ k ] * v[ k ]; - } - matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] ); - ptr2 += M; /* go to next column*/ - } - } - } - silk_assert( err == 0 ); -} - diff --git a/thirdparty/opus/silk/float/sort_FLP.c b/thirdparty/opus/silk/float/sort_FLP.c deleted file mode 100644 index f08d7592c5..0000000000 --- a/thirdparty/opus/silk/float/sort_FLP.c +++ /dev/null @@ -1,83 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Insertion sort (fast for already almost sorted arrays): */ -/* Best case: O(n) for an already sorted array */ -/* Worst case: O(n^2) for an inversely sorted array */ - -#include "typedef.h" -#include "SigProc_FLP.h" - -void silk_insertion_sort_decreasing_FLP( - silk_float *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -) -{ - silk_float value; - opus_int i, j; - - /* Safety checks */ - silk_assert( K > 0 ); - silk_assert( L > 0 ); - silk_assert( L >= K ); - - /* Write start indices in index vector */ - for( i = 0; i < K; i++ ) { - idx[ i ] = i; - } - - /* Sort vector elements by value, decreasing order */ - for( i = 1; i < K; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - - /* If less than L values are asked check the remaining values, */ - /* but only spend CPU to ensure that the K first values are correct */ - for( i = K; i < L; i++ ) { - value = a[ i ]; - if( value > a[ K - 1 ] ) { - for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - } -} diff --git a/thirdparty/opus/silk/float/structs_FLP.h b/thirdparty/opus/silk/float/structs_FLP.h deleted file mode 100644 index 14d647ced2..0000000000 --- a/thirdparty/opus/silk/float/structs_FLP.h +++ /dev/null @@ -1,132 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_STRUCTS_FLP_H -#define SILK_STRUCTS_FLP_H - -#include "typedef.h" -#include "main.h" -#include "structs.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/********************************/ -/* Noise shaping analysis state */ -/********************************/ -typedef struct { - opus_int8 LastGainIndex; - silk_float HarmBoost_smth; - silk_float HarmShapeGain_smth; - silk_float Tilt_smth; -} silk_shape_state_FLP; - -/********************************/ -/* Prefilter state */ -/********************************/ -typedef struct { - silk_float sLTP_shp[ LTP_BUF_LENGTH ]; - silk_float sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int sLTP_shp_buf_idx; - silk_float sLF_AR_shp; - silk_float sLF_MA_shp; - silk_float sHarmHP; - opus_int32 rand_seed; - opus_int lagPrev; -} silk_prefilter_state_FLP; - -/********************************/ -/* Encoder state FLP */ -/********************************/ -typedef struct { - silk_encoder_state sCmn; /* Common struct, shared with fixed-point code */ - silk_shape_state_FLP sShape; /* Noise shaping state */ - silk_prefilter_state_FLP sPrefilt; /* Prefilter State */ - - /* Buffer for find pitch and noise shape analysis */ - silk_float x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ - silk_float LTPCorr; /* Normalized correlation from pitch lag estimator */ -} silk_encoder_state_FLP; - -/************************/ -/* Encoder control FLP */ -/************************/ -typedef struct { - /* Prediction and coding parameters */ - silk_float Gains[ MAX_NB_SUBFR ]; - silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ]; /* holds interpolated and final coefficients */ - silk_float LTPCoef[LTP_ORDER * MAX_NB_SUBFR]; - silk_float LTP_scale; - opus_int pitchL[ MAX_NB_SUBFR ]; - - /* Noise shaping parameters */ - silk_float AR1[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - silk_float AR2[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - silk_float LF_MA_shp[ MAX_NB_SUBFR ]; - silk_float LF_AR_shp[ MAX_NB_SUBFR ]; - silk_float GainsPre[ MAX_NB_SUBFR ]; - silk_float HarmBoost[ MAX_NB_SUBFR ]; - silk_float Tilt[ MAX_NB_SUBFR ]; - silk_float HarmShapeGain[ MAX_NB_SUBFR ]; - silk_float Lambda; - silk_float input_quality; - silk_float coding_quality; - - /* Measures */ - silk_float sparseness; - silk_float predGain; - silk_float LTPredCodGain; - silk_float ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ - - /* Parameters for CBR mode */ - opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ]; - opus_int8 lastGainIndexPrev; -} silk_encoder_control_FLP; - -/************************/ -/* Encoder Super Struct */ -/************************/ -typedef struct { - silk_encoder_state_FLP state_Fxx[ ENCODER_NUM_CHANNELS ]; - stereo_enc_state sStereo; - opus_int32 nBitsUsedLBRR; - opus_int32 nBitsExceeded; - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int nPrevChannelsInternal; - opus_int timeSinceSwitchAllowed_ms; - opus_int allowBandwidthSwitch; - opus_int prev_decode_only_middle; -} silk_encoder; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c b/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c deleted file mode 100644 index 542414f48e..0000000000 --- a/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c +++ /dev/null @@ -1,73 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FLP( - silk_float *corr, /* O Result [order + 1] */ - const silk_float *input, /* I Input data to correlate */ - const silk_float warping, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - opus_int n, i; - double tmp1, tmp2; - double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; - double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - /* Loop over samples */ - for( n = 0; n < length; n++ ) { - tmp1 = input[ n ]; - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 ); - state[ i ] = tmp1; - C[ i ] += state[ 0 ] * tmp1; - /* Output of allpass section */ - tmp1 = state[ i + 1 ] + warping * ( state[ i + 2 ] - tmp2 ); - state[ i + 1 ] = tmp2; - C[ i + 1 ] += state[ 0 ] * tmp2; - } - state[ order ] = tmp1; - C[ order ] += state[ 0 ] * tmp1; - } - - /* Copy correlations in silk_float output format */ - for( i = 0; i < order + 1; i++ ) { - corr[ i ] = ( silk_float )C[ i ]; - } -} diff --git a/thirdparty/opus/silk/float/wrappers_FLP.c b/thirdparty/opus/silk/float/wrappers_FLP.c deleted file mode 100644 index 6666b8efaa..0000000000 --- a/thirdparty/opus/silk/float/wrappers_FLP.c +++ /dev/null @@ -1,202 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Wrappers. Calls flp / fix code */ - -/* Convert AR filter coefficients to NLSF parameters */ -void silk_A2NLSF_FLP( - opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */ - const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -) -{ - opus_int i; - opus_int32 a_fix_Q16[ MAX_LPC_ORDER ]; - - for( i = 0; i < LPC_order; i++ ) { - a_fix_Q16[ i ] = silk_float2int( pAR[ i ] * 65536.0f ); - } - - silk_A2NLSF( NLSF_Q15, a_fix_Q16, LPC_order ); -} - -/* Convert LSF parameters to AR prediction filter coefficients */ -void silk_NLSF2A_FLP( - silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ - const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -) -{ - opus_int i; - opus_int16 a_fix_Q12[ MAX_LPC_ORDER ]; - - silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order ); - - for( i = 0; i < LPC_order; i++ ) { - pAR[ i ] = ( silk_float )a_fix_Q12[ i ] * ( 1.0f / 4096.0f ); - } -} - -/******************************************/ -/* Floating-point NLSF processing wrapper */ -/******************************************/ -void silk_process_NLSFs_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -) -{ - opus_int i, j; - opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - - silk_process_NLSFs( psEncC, PredCoef_Q12, NLSF_Q15, prev_NLSF_Q15); - - for( j = 0; j < 2; j++ ) { - for( i = 0; i < psEncC->predictLPCOrder; i++ ) { - PredCoef[ j ][ i ] = ( silk_float )PredCoef_Q12[ j ][ i ] * ( 1.0f / 4096.0f ); - } - } -} - -/****************************************/ -/* Floating-point Silk NSQ wrapper */ -/****************************************/ -void silk_NSQ_wrapper_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - SideInfoIndices *psIndices, /* I/O Quantization indices */ - silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const silk_float x[] /* I Prefiltered input signal */ -) -{ - opus_int i, j; - opus_int32 x_Q3[ MAX_FRAME_LENGTH ]; - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; - opus_int LTP_scale_Q14; - - /* Noise shaping parameters */ - opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ - opus_int Lambda_Q10; - opus_int Tilt_Q14[ MAX_NB_SUBFR ]; - opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; - - /* Convert control struct to fix control struct */ - /* Noise shape parameters */ - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - for( j = 0; j < psEnc->sCmn.shapingLPCOrder; j++ ) { - AR2_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR2[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - LF_shp_Q14[ i ] = silk_LSHIFT32( silk_float2int( psEncCtrl->LF_AR_shp[ i ] * 16384.0f ), 16 ) | - (opus_uint16)silk_float2int( psEncCtrl->LF_MA_shp[ i ] * 16384.0f ); - Tilt_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->Tilt[ i ] * 16384.0f ); - HarmShapeGain_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->HarmShapeGain[ i ] * 16384.0f ); - } - Lambda_Q10 = ( opus_int )silk_float2int( psEncCtrl->Lambda * 1024.0f ); - - /* prediction and coding parameters */ - for( i = 0; i < psEnc->sCmn.nb_subfr * LTP_ORDER; i++ ) { - LTPCoef_Q14[ i ] = (opus_int16)silk_float2int( psEncCtrl->LTPCoef[ i ] * 16384.0f ); - } - - for( j = 0; j < 2; j++ ) { - for( i = 0; i < psEnc->sCmn.predictLPCOrder; i++ ) { - PredCoef_Q12[ j ][ i ] = (opus_int16)silk_float2int( psEncCtrl->PredCoef[ j ][ i ] * 4096.0f ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - Gains_Q16[ i ] = silk_float2int( psEncCtrl->Gains[ i ] * 65536.0f ); - silk_assert( Gains_Q16[ i ] > 0 ); - } - - if( psIndices->signalType == TYPE_VOICED ) { - LTP_scale_Q14 = silk_LTPScales_table_Q14[ psIndices->LTP_scaleIndex ]; - } else { - LTP_scale_Q14 = 0; - } - - /* Convert input to fix */ - for( i = 0; i < psEnc->sCmn.frame_length; i++ ) { - x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] ); - } - - /* Call NSQ */ - if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); - } else { - silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); - } -} - -/***********************************************/ -/* Floating-point Silk LTP quantiation wrapper */ -/***********************************************/ -void silk_quant_LTP_gains_FLP( - silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ - opus_int8 *periodicity_index, /* O Periodicity index */ - opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ - const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ - const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -) -{ - opus_int i; - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ]; - opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ]; - - for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { - B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f ); - } - for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) { - W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f ); - } - - silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr, arch ); - - for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { - B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f ); - } -} diff --git a/thirdparty/opus/silk/gain_quant.c b/thirdparty/opus/silk/gain_quant.c deleted file mode 100644 index 64ccd0611b..0000000000 --- a/thirdparty/opus/silk/gain_quant.c +++ /dev/null @@ -1,141 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -#define OFFSET ( ( MIN_QGAIN_DB * 128 ) / 6 + 16 * 128 ) -#define SCALE_Q16 ( ( 65536 * ( N_LEVELS_QGAIN - 1 ) ) / ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) -#define INV_SCALE_Q16 ( ( 65536 * ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) / ( N_LEVELS_QGAIN - 1 ) ) - -/* Gain scalar quantization with hysteresis, uniform on log scale */ -void silk_gains_quant( - opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */ - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -) -{ - opus_int k, double_step_size_threshold; - - for( k = 0; k < nb_subfr; k++ ) { - /* Convert to log scale, scale, floor() */ - ind[ k ] = silk_SMULWB( SCALE_Q16, silk_lin2log( gain_Q16[ k ] ) - OFFSET ); - - /* Round towards previous quantized gain (hysteresis) */ - if( ind[ k ] < *prev_ind ) { - ind[ k ]++; - } - ind[ k ] = silk_LIMIT_int( ind[ k ], 0, N_LEVELS_QGAIN - 1 ); - - /* Compute delta indices and limit */ - if( k == 0 && conditional == 0 ) { - /* Full index */ - ind[ k ] = silk_LIMIT_int( ind[ k ], *prev_ind + MIN_DELTA_GAIN_QUANT, N_LEVELS_QGAIN - 1 ); - *prev_ind = ind[ k ]; - } else { - /* Delta index */ - ind[ k ] = ind[ k ] - *prev_ind; - - /* Double the quantization step size for large gain increases, so that the max gain level can be reached */ - double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind; - if( ind[ k ] > double_step_size_threshold ) { - ind[ k ] = double_step_size_threshold + silk_RSHIFT( ind[ k ] - double_step_size_threshold + 1, 1 ); - } - - ind[ k ] = silk_LIMIT_int( ind[ k ], MIN_DELTA_GAIN_QUANT, MAX_DELTA_GAIN_QUANT ); - - /* Accumulate deltas */ - if( ind[ k ] > double_step_size_threshold ) { - *prev_ind += silk_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold; - } else { - *prev_ind += ind[ k ]; - } - - /* Shift to make non-negative */ - ind[ k ] -= MIN_DELTA_GAIN_QUANT; - } - - /* Scale and convert to linear scale */ - gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */ - } -} - -/* Gains scalar dequantization, uniform on log scale */ -void silk_gains_dequant( - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -) -{ - opus_int k, ind_tmp, double_step_size_threshold; - - for( k = 0; k < nb_subfr; k++ ) { - if( k == 0 && conditional == 0 ) { - /* Gain index is not allowed to go down more than 16 steps (~21.8 dB) */ - *prev_ind = silk_max_int( ind[ k ], *prev_ind - 16 ); - } else { - /* Delta index */ - ind_tmp = ind[ k ] + MIN_DELTA_GAIN_QUANT; - - /* Accumulate deltas */ - double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind; - if( ind_tmp > double_step_size_threshold ) { - *prev_ind += silk_LSHIFT( ind_tmp, 1 ) - double_step_size_threshold; - } else { - *prev_ind += ind_tmp; - } - } - *prev_ind = silk_LIMIT_int( *prev_ind, 0, N_LEVELS_QGAIN - 1 ); - - /* Scale and convert to linear scale */ - gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */ - } -} - -/* Compute unique identifier of gain indices vector */ -opus_int32 silk_gains_ID( /* O returns unique identifier of gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - const opus_int nb_subfr /* I number of subframes */ -) -{ - opus_int k; - opus_int32 gainsID; - - gainsID = 0; - for( k = 0; k < nb_subfr; k++ ) { - gainsID = silk_ADD_LSHIFT32( ind[ k ], gainsID, 8 ); - } - - return gainsID; -} diff --git a/thirdparty/opus/silk/init_decoder.c b/thirdparty/opus/silk/init_decoder.c deleted file mode 100644 index f887c67886..0000000000 --- a/thirdparty/opus/silk/init_decoder.c +++ /dev/null @@ -1,56 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/************************/ -/* Init Decoder State */ -/************************/ -opus_int silk_init_decoder( - silk_decoder_state *psDec /* I/O Decoder state pointer */ -) -{ - /* Clear the entire encoder state, except anything copied */ - silk_memset( psDec, 0, sizeof( silk_decoder_state ) ); - - /* Used to deactivate LSF interpolation */ - psDec->first_frame_after_reset = 1; - psDec->prev_gain_Q16 = 65536; - - /* Reset CNG state */ - silk_CNG_Reset( psDec ); - - /* Reset PLC state */ - silk_PLC_Reset( psDec ); - - return(0); -} - diff --git a/thirdparty/opus/silk/init_encoder.c b/thirdparty/opus/silk/init_encoder.c deleted file mode 100644 index 65995c33fa..0000000000 --- a/thirdparty/opus/silk/init_encoder.c +++ /dev/null @@ -1,64 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#ifdef FIXED_POINT -#include "main_FIX.h" -#else -#include "main_FLP.h" -#endif -#include "tuning_parameters.h" -#include "cpu_support.h" - -/*********************************/ -/* Initialize Silk Encoder state */ -/*********************************/ -opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ - int arch /* I Run-time architecture */ -) -{ - opus_int ret = 0; - - /* Clear the entire encoder state */ - silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) ); - - psEnc->sCmn.arch = arch; - - psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 ); - psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15; - - /* Used to deactivate LSF interpolation, pitch prediction */ - psEnc->sCmn.first_frame_after_reset = 1; - - /* Initialize Silk VAD */ - ret += silk_VAD_Init( &psEnc->sCmn.sVAD ); - - return ret; -} diff --git a/thirdparty/opus/silk/inner_prod_aligned.c b/thirdparty/opus/silk/inner_prod_aligned.c deleted file mode 100644 index 257ae9e04e..0000000000 --- a/thirdparty/opus/silk/inner_prod_aligned.c +++ /dev/null @@ -1,47 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -opus_int32 silk_inner_prod_aligned_scale( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int scale, /* I number of bits to shift */ - const opus_int len /* I vector lengths */ -) -{ - opus_int i; - opus_int32 sum = 0; - for( i = 0; i < len; i++ ) { - sum = silk_ADD_RSHIFT32( sum, silk_SMULBB( inVec1[ i ], inVec2[ i ] ), scale ); - } - return sum; -} diff --git a/thirdparty/opus/silk/interpolate.c b/thirdparty/opus/silk/interpolate.c deleted file mode 100644 index 1bd8ca4d53..0000000000 --- a/thirdparty/opus/silk/interpolate.c +++ /dev/null @@ -1,51 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Interpolate two vectors */ -void silk_interpolate( - opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */ - const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */ - const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */ - const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */ - const opus_int d /* I number of parameters */ -) -{ - opus_int i; - - silk_assert( ifact_Q2 >= 0 ); - silk_assert( ifact_Q2 <= 4 ); - - for( i = 0; i < d; i++ ) { - xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 ); - } -} diff --git a/thirdparty/opus/silk/lin2log.c b/thirdparty/opus/silk/lin2log.c deleted file mode 100644 index d4fe515321..0000000000 --- a/thirdparty/opus/silk/lin2log.c +++ /dev/null @@ -1,46 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -/* Approximation of 128 * log2() (very close inverse of silk_log2lin()) */ -/* Convert input to a log scale */ -opus_int32 silk_lin2log( - const opus_int32 inLin /* I input in linear scale */ -) -{ - opus_int32 lz, frac_Q7; - - silk_CLZ_FRAC( inLin, &lz, &frac_Q7 ); - - /* Piece-wise parabolic approximation */ - return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 ); -} - diff --git a/thirdparty/opus/silk/log2lin.c b/thirdparty/opus/silk/log2lin.c deleted file mode 100644 index b7c48e4740..0000000000 --- a/thirdparty/opus/silk/log2lin.c +++ /dev/null @@ -1,58 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Approximation of 2^() (very close inverse of silk_lin2log()) */ -/* Convert input to a linear scale */ -opus_int32 silk_log2lin( - const opus_int32 inLog_Q7 /* I input on log scale */ -) -{ - opus_int32 out, frac_Q7; - - if( inLog_Q7 < 0 ) { - return 0; - } else if ( inLog_Q7 >= 3967 ) { - return silk_int32_MAX; - } - - out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) ); - frac_Q7 = inLog_Q7 & 0x7F; - if( inLog_Q7 < 2048 ) { - /* Piece-wise parabolic approximation */ - out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 ); - } else { - /* Piece-wise parabolic approximation */ - out = silk_MLA( out, silk_RSHIFT( out, 7 ), silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ); - } - return out; -} diff --git a/thirdparty/opus/silk/macros.h b/thirdparty/opus/silk/macros.h deleted file mode 100644 index d3ca347520..0000000000 --- a/thirdparty/opus/silk/macros.h +++ /dev/null @@ -1,159 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_H -#define SILK_MACROS_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_types.h" -#include "opus_defines.h" -#include "arch.h" - -#if OPUS_GNUC_PREREQ(3, 0) -#define opus_likely(x) (__builtin_expect(!!(x), 1)) -#define opus_unlikely(x) (__builtin_expect(!!(x), 0)) -#else -#define opus_likely(x) (!!(x)) -#define opus_unlikely(x) (!!(x)) -#endif - -/* This is an OPUS_INLINE header file for general platform. */ - -/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ -#if OPUS_FAST_INT64 -#define silk_SMULWB(a32, b32) ((opus_int32)(((a32) * (opus_int64)((opus_int16)(b32))) >> 16)) -#else -#define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16)) -#endif - -/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ -#if OPUS_FAST_INT64 -#define silk_SMLAWB(a32, b32, c32) ((opus_int32)((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16))) -#else -#define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))) -#endif - -/* (a32 * (b32 >> 16)) >> 16 */ -#if OPUS_FAST_INT64 -#define silk_SMULWT(a32, b32) ((opus_int32)(((a32) * (opus_int64)((b32) >> 16)) >> 16)) -#else -#define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16)) -#endif - -/* a32 + (b32 * (c32 >> 16)) >> 16 */ -#if OPUS_FAST_INT64 -#define silk_SMLAWT(a32, b32, c32) ((opus_int32)((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16))) -#else -#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16)) -#endif - -/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ -#define silk_SMULBB(a32, b32) ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32))) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ -#define silk_SMLABB(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))) - -/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ -#define silk_SMULBT(a32, b32) ((opus_int32)((opus_int16)(a32)) * ((b32) >> 16)) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ -#define silk_SMLABT(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16)) - -/* a64 + (b32 * c32) */ -#define silk_SMLAL(a64, b32, c32) (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32)))) - -/* (a32 * b32) >> 16 */ -#if OPUS_FAST_INT64 -#define silk_SMULWW(a32, b32) ((opus_int32)(((opus_int64)(a32) * (b32)) >> 16)) -#else -#define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)) -#endif - -/* a32 + ((b32 * c32) >> 16) */ -#if OPUS_FAST_INT64 -#define silk_SMLAWW(a32, b32, c32) ((opus_int32)((a32) + (((opus_int64)(b32) * (c32)) >> 16))) -#else -#define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)) -#endif - -/* add/subtract with output saturated */ -#define silk_ADD_SAT32(a, b) ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ? \ - ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) : \ - ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) ) - -#define silk_SUB_SAT32(a, b) ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ? \ - (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ - ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) - -#if defined(MIPSr1_ASM) -#include "mips/macros_mipsr1.h" -#endif - -#include "ecintrin.h" -#ifndef OVERRIDE_silk_CLZ16 -static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) -{ - return 32 - EC_ILOG(in16<<16|0x8000); -} -#endif - -#ifndef OVERRIDE_silk_CLZ32 -static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) -{ - return in32 ? 32 - EC_ILOG(in32) : 32; -} -#endif - -/* Row based */ -#define matrix_ptr(Matrix_base_adr, row, column, N) \ - (*((Matrix_base_adr) + ((row)*(N)+(column)))) -#define matrix_adr(Matrix_base_adr, row, column, N) \ - ((Matrix_base_adr) + ((row)*(N)+(column))) - -/* Column based */ -#ifndef matrix_c_ptr -# define matrix_c_ptr(Matrix_base_adr, row, column, M) \ - (*((Matrix_base_adr) + ((row)+(M)*(column)))) -#endif - -#ifdef OPUS_ARM_INLINE_ASM -#include "arm/macros_armv4.h" -#endif - -#ifdef OPUS_ARM_INLINE_EDSP -#include "arm/macros_armv5e.h" -#endif - -#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR -#include "arm/macros_arm64.h" -#endif - -#endif /* SILK_MACROS_H */ - diff --git a/thirdparty/opus/silk/main.h b/thirdparty/opus/silk/main.h deleted file mode 100644 index 2f90d68f7d..0000000000 --- a/thirdparty/opus/silk/main.h +++ /dev/null @@ -1,471 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MAIN_H -#define SILK_MAIN_H - -#include "SigProc_FIX.h" -#include "define.h" -#include "structs.h" -#include "tables.h" -#include "PLC.h" -#include "control.h" -#include "debug.h" -#include "entenc.h" -#include "entdec.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/main_sse.h" -#endif - -/* Convert Left/Right stereo signal to adaptive Mid/Side representation */ -void silk_stereo_LR_to_MS( - stereo_enc_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */ - opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */ - opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ - opus_int32 total_rate_bps, /* I Total bitrate */ - opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ - opus_int toMono, /* I Last frame before a stereo->mono transition */ - opus_int fs_kHz, /* I Sample rate (kHz) */ - opus_int frame_length /* I Number of samples */ -); - -/* Convert adaptive Mid/Side representation to Left/Right stereo signal */ -void silk_stereo_MS_to_LR( - stereo_dec_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - const opus_int32 pred_Q13[], /* I Predictors */ - opus_int fs_kHz, /* I Samples rate (kHz) */ - opus_int frame_length /* I Number of samples */ -); - -/* Find least-squares prediction gain for one signal based on another and quantize it */ -opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */ - opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */ - const opus_int16 x[], /* I Basis signal */ - const opus_int16 y[], /* I Target signal */ - opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */ - opus_int length, /* I Number of samples */ - opus_int smooth_coef_Q16 /* I Smoothing coefficient */ -); - -/* Quantize mid/side predictors */ -void silk_stereo_quant_pred( - opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */ - opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */ -); - -/* Entropy code the mid/side quantization indices */ -void silk_stereo_encode_pred( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */ -); - -/* Entropy code the mid-only flag */ -void silk_stereo_encode_mid_only( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 mid_only_flag -); - -/* Decode mid/side predictors */ -void silk_stereo_decode_pred( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int32 pred_Q13[] /* O Predictors */ -); - -/* Decode mid-only flag */ -void silk_stereo_decode_mid_only( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int *decode_only_mid /* O Flag that only mid channel has been coded */ -); - -/* Encodes signs of excitation */ -void silk_encode_signs( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - const opus_int8 pulses[], /* I pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -); - -/* Decodes signs of excitation */ -void silk_decode_signs( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* I/O pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -); - -/* Check encoder control struct */ -opus_int check_control_input( - silk_EncControlStruct *encControl /* I Control structure */ -); - -/* Control internal sampling rate */ -opus_int silk_control_audio_bandwidth( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl /* I Control structure */ -); - -/* Control SNR of redidual quantizer */ -opus_int silk_control_SNR( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - opus_int32 TargetRate_bps /* I Target max bitrate (bps) */ -); - -/***************/ -/* Shell coder */ -/***************/ - -/* Encode quantization indices of excitation */ -void silk_encode_pulses( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I quantOffsetType */ - opus_int8 pulses[], /* I quantization indices */ - const opus_int frame_length /* I Frame length */ -); - -/* Shell encoder, operates on one shell code frame of 16 pulses */ -void silk_shell_encoder( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */ -); - -/* Shell decoder, operates on one shell code frame of 16 pulses */ -void silk_shell_decoder( - opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - const opus_int pulses4 /* I number of pulses per pulse-subframe */ -); - -/* Gain scalar quantization with hysteresis, uniform on log scale */ -void silk_gains_quant( - opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */ - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -); - -/* Gains scalar dequantization, uniform on log scale */ -void silk_gains_dequant( - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -); - -/* Compute unique identifier of gain indices vector */ -opus_int32 silk_gains_ID( /* O returns unique identifier of gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - const opus_int nb_subfr /* I number of subframes */ -); - -/* Interpolate two vectors */ -void silk_interpolate( - opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */ - const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */ - const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */ - const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */ - const opus_int d /* I number of parameters */ -); - -/* LTP tap quantizer */ -void silk_quant_LTP_gains( - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ - opus_int8 *periodicity_index, /* O Periodicity Index */ - opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */ - const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ - opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ - opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -); - -/* Entropy constrained matrix-weighted VQ, for a single input data vector */ -void silk_VQ_WMat_EC_c( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -); - -#if !defined(OVERRIDE_silk_VQ_WMat_EC) -#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_c(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) -#endif - -/************************************/ -/* Noise shaping quantization (NSQ) */ -/************************************/ - -void silk_NSQ_c( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if !defined(OVERRIDE_silk_NSQ) -#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#endif - -/* Noise shaping using delayed decision */ -void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if !defined(OVERRIDE_silk_NSQ_del_dec) -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_del_dec_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#endif - -/************/ -/* Silk VAD */ -/************/ -/* Initialize the Silk VAD */ -opus_int silk_VAD_Init( /* O Return value, 0 if success */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -); - -/* Get speech activity level in Q8 */ -opus_int silk_VAD_GetSA_Q8_c( /* O Return value, 0 if success */ - silk_encoder_state *psEncC, /* I/O Encoder state */ - const opus_int16 pIn[] /* I PCM input */ -); - -#if !defined(OVERRIDE_silk_VAD_GetSA_Q8) -#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_c(psEnC, pIn)) -#endif - -/* Low-pass filter with variable cutoff frequency based on */ -/* piece-wise linear interpolation between elliptic filters */ -/* Start by setting transition_frame_no = 1; */ -void silk_LP_variable_cutoff( - silk_LP_state *psLP, /* I/O LP filter state */ - opus_int16 *frame, /* I/O Low-pass filtered output signal */ - const opus_int frame_length /* I Frame length */ -); - -/******************/ -/* NLSF Quantizer */ -/******************/ -/* Limit, stabilize, convert and quantize NLSFs */ -void silk_process_NLSFs( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -); - -opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */ - const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */ - const opus_int nSurvivors, /* I Max survivors after first stage */ - const opus_int signalType /* I Signal type: 0/1/2 */ -); - -/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */ -void silk_NLSF_VQ( - opus_int32 err_Q26[], /* O Quantization errors [K] */ - const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ - const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ - const opus_int K, /* I Number of codebook vectors */ - const opus_int LPC_order /* I Number of LPCs */ -); - -/* Delayed-decision quantizer for NLSF residuals */ -opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */ - opus_int8 indices[], /* O Quantization indices [ order ] */ - const opus_int16 x_Q10[], /* I Input [ order ] */ - const opus_int16 w_Q5[], /* I Weights [ order ] */ - const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ - const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */ - const opus_uint8 ec_rates_Q5[], /* I Rates [] */ - const opus_int quant_step_size_Q16, /* I Quantization step size */ - const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */ - const opus_int32 mu_Q20, /* I R/D tradeoff */ - const opus_int16 order /* I Number of input values */ -); - -/* Unpack predictor values and indices for entropy coding tables */ -void silk_NLSF_unpack( - opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */ - opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int CB1_index /* I Index of vector in first LSF codebook */ -); - -/***********************/ -/* NLSF vector decoder */ -/***********************/ -void silk_NLSF_decode( - opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ -); - -/****************************************************/ -/* Decoder Functions */ -/****************************************************/ -opus_int silk_init_decoder( - silk_decoder_state *psDec /* I/O Decoder state pointer */ -); - -/* Set decoder sampling rate */ -opus_int silk_decoder_set_fs( - silk_decoder_state *psDec, /* I/O Decoder state pointer */ - opus_int fs_kHz, /* I Sampling frequency (kHz) */ - opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */ -); - -/****************/ -/* Decode frame */ -/****************/ -opus_int silk_decode_frame( - silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pOut[], /* O Pointer to output speech frame */ - opus_int32 *pN, /* O Pointer to size of output frame */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int condCoding, /* I The type of conditional coding to use */ - int arch /* I Run-time architecture */ -); - -/* Decode indices from bitstream */ -void silk_decode_indices( - silk_decoder_state *psDec, /* I/O State */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* Decode parameters from payload */ -void silk_decode_parameters( - silk_decoder_state *psDec, /* I/O State */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* Core decoder. Performs inverse NSQ operation LTP + LPC */ -void silk_decode_core( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I Decoder control */ - opus_int16 xq[], /* O Decoded speech */ - const opus_int16 pulses[ MAX_FRAME_LENGTH ], /* I Pulse signal */ - int arch /* I Run-time architecture */ -); - -/* Decode quantization indices of excitation (Shell coding) */ -void silk_decode_pulses( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* O Excitation signal */ - const opus_int signalType, /* I Sigtype */ - const opus_int quantOffsetType, /* I quantOffsetType */ - const opus_int frame_length /* I Frame length */ -); - -/******************/ -/* CNG */ -/******************/ - -/* Reset CNG */ -void silk_CNG_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -); - -/* Updates CNG estimate, and applies the CNG when packet was lost */ -void silk_CNG( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O Signal */ - opus_int length /* I Length of residual */ -); - -/* Encoding of various parameters */ -void silk_encode_indices( - silk_encoder_state *psEncC, /* I/O Encoder state */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -#endif diff --git a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h b/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h deleted file mode 100644 index ad1cfe2a9b..0000000000 --- a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h +++ /dev/null @@ -1,409 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef __NSQ_DEL_DEC_MIPSR1_H__ -#define __NSQ_DEL_DEC_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -#define OVERRIDE_silk_noise_shape_quantizer_del_dec -static inline void silk_noise_shape_quantizer_del_dec( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay, /* I */ - int arch /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; - opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; - opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; - NSQ_sample_struct psSampleState[ MAX_DEL_DEC_STATES ][ 2 ]; - NSQ_del_dec_struct *psDD; - NSQ_sample_struct *psSS; - opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4; - opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6; - opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13; - opus_int16 a_Q12_14, a_Q12_15; - - opus_int32 cur, prev, next; - - /*Unused.*/ - (void)arch; - - //Intialize b_Q14 variables - b_Q14_0 = b_Q14[ 0 ]; - b_Q14_1 = b_Q14[ 1 ]; - b_Q14_2 = b_Q14[ 2 ]; - b_Q14_3 = b_Q14[ 3 ]; - b_Q14_4 = b_Q14[ 4 ]; - - //Intialize a_Q12 variables - a_Q12_0 = a_Q12[0]; - a_Q12_1 = a_Q12[1]; - a_Q12_2 = a_Q12[2]; - a_Q12_3 = a_Q12[3]; - a_Q12_4 = a_Q12[4]; - a_Q12_5 = a_Q12[5]; - a_Q12_6 = a_Q12[6]; - a_Q12_7 = a_Q12[7]; - a_Q12_8 = a_Q12[8]; - a_Q12_9 = a_Q12[9]; - a_Q12_10 = a_Q12[10]; - a_Q12_11 = a_Q12[11]; - a_Q12_12 = a_Q12[12]; - a_Q12_13 = a_Q12[13]; - a_Q12_14 = a_Q12[14]; - a_Q12_15 = a_Q12[15]; - - long long temp64; - - silk_assert( nStatesDelayedDecision > 0 ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - for( i = 0; i < length; i++ ) { - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 ); - temp64 += 32768; - LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - /* Delayed decision state */ - psDD = &psDelDec[ k ]; - - /* Sample state */ - psSS = psSampleState[ k ]; - - /* Generate dither */ - psDD->Seed = silk_RAND( psDD->Seed ); - - /* Pointer used in short term prediction and shaping */ - psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; - /* Short-term prediction */ - silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); - temp64 = __builtin_mips_mult(psLPC_Q14[ 0 ], a_Q12_0 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 ); - if( predictLPCOrder == 16 ) { - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 ); - } - temp64 += 32768; - LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16); - - LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ 0 ] = tmp2; - - temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] ); - - prev = psDD->sAR2_Q14[ 1 ]; - - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - cur = psDD->sAR2_Q14[ j ]; - next = psDD->sAR2_Q14[ j+1 ]; - /* Output of allpass section */ - tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 ); - psDD->sAR2_Q14[ j - 1 ] = tmp1; - temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] ); - temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ j + 0 ] = tmp2; - prev = next; - } - psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); - temp64 += 32768; - n_AR_Q14 = __builtin_mips_extr_w(temp64, 16); - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ - - n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ - tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ - - r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( psDD->Seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); - - if( rd1_Q10 < rd2_Q10 ) { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 0 ].Q_Q10 = q1_Q10; - psSS[ 1 ].Q_Q10 = q2_Q10; - } else { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 0 ].Q_Q10 = q2_Q10; - psSS[ 1 ].Q_Q10 = q1_Q10; - } - - /* Update states for best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 0 ].xq_Q14 = xq_Q14; - - /* Update states for second best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 1 ].xq_Q14 = xq_Q14; - } - - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; - for( k = 0; k < nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { - psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); - psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); - silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); - } - } - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; - RDmin_ind = k; - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, - ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); - silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); - } - - /* Write samples from winner to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psSS = &psSampleState[ k ][ 0 ]; - psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; - psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; - psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; - psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); - psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; - psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); - psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; - psDD->RD_Q10 = psSS->RD_Q10; - } - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - } -} - -#endif /* __NSQ_DEL_DEC_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/mips/macros_mipsr1.h b/thirdparty/opus/silk/mips/macros_mipsr1.h deleted file mode 100644 index 12ed981a6e..0000000000 --- a/thirdparty/opus/silk/mips/macros_mipsr1.h +++ /dev/null @@ -1,92 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - - -#ifndef __SILK_MACROS_MIPSR1_H__ -#define __SILK_MACROS_MIPSR1_H__ - -#define mips_clz(x) __builtin_clz(x) - -#undef silk_SMULWB -static inline int silk_SMULWB(int a, int b) -{ - long long ac; - int c; - - ac = __builtin_mips_mult(a, (opus_int32)(opus_int16)b); - c = __builtin_mips_extr_w(ac, 16); - - return c; -} - -#undef silk_SMLAWB -#define silk_SMLAWB(a32, b32, c32) ((a32) + silk_SMULWB(b32, c32)) - -#undef silk_SMULWW -static inline int silk_SMULWW(int a, int b) -{ - long long ac; - int c; - - ac = __builtin_mips_mult(a, b); - c = __builtin_mips_extr_w(ac, 16); - - return c; -} - -#undef silk_SMLAWW -static inline int silk_SMLAWW(int a, int b, int c) -{ - long long ac; - int res; - - ac = __builtin_mips_mult(b, c); - res = __builtin_mips_extr_w(ac, 16); - res += a; - - return res; -} - -#define OVERRIDE_silk_CLZ16 -static inline opus_int32 silk_CLZ16(opus_int16 in16) -{ - int re32; - opus_int32 in32 = (opus_int32 )in16; - re32 = mips_clz(in32); - re32-=16; - return re32; -} - -#define OVERRIDE_silk_CLZ32 -static inline opus_int32 silk_CLZ32(opus_int32 in32) -{ - int re32; - re32 = mips_clz(in32); - return re32; -} - -#endif /* __SILK_MACROS_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h b/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h deleted file mode 100644 index 3b0a695365..0000000000 --- a/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h +++ /dev/null @@ -1,65 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_MIPSR1_H -#define SILK_SIGPROC_FIX_MIPSR1_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#undef silk_SAT16 -static inline short int silk_SAT16(int a) -{ - int c; - c = __builtin_mips_shll_s_w(a, 16); - c = c>>16; - - return c; -} - -#undef silk_LSHIFT_SAT32 -static inline int silk_LSHIFT_SAT32(int a, int shift) -{ - int r; - - r = __builtin_mips_shll_s_w(a, shift); - - return r; -} - -#undef silk_RSHIFT_ROUND -static inline int silk_RSHIFT_ROUND(int a, int shift) -{ - int r; - - r = __builtin_mips_shra_r_w(a, shift); - return r; -} - -#endif /* SILK_SIGPROC_FIX_MIPSR1_H */ diff --git a/thirdparty/opus/silk/pitch_est_defines.h b/thirdparty/opus/silk/pitch_est_defines.h deleted file mode 100644 index e1e4b5d768..0000000000 --- a/thirdparty/opus/silk/pitch_est_defines.h +++ /dev/null @@ -1,88 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_PE_DEFINES_H -#define SILK_PE_DEFINES_H - -#include "SigProc_FIX.h" - -/********************************************************/ -/* Definitions for pitch estimator */ -/********************************************************/ - -#define PE_MAX_FS_KHZ 16 /* Maximum sampling frequency used */ - -#define PE_MAX_NB_SUBFR 4 -#define PE_SUBFR_LENGTH_MS 5 /* 5 ms */ - -#define PE_LTP_MEM_LENGTH_MS ( 4 * PE_SUBFR_LENGTH_MS ) - -#define PE_MAX_FRAME_LENGTH_MS ( PE_LTP_MEM_LENGTH_MS + PE_MAX_NB_SUBFR * PE_SUBFR_LENGTH_MS ) -#define PE_MAX_FRAME_LENGTH ( PE_MAX_FRAME_LENGTH_MS * PE_MAX_FS_KHZ ) -#define PE_MAX_FRAME_LENGTH_ST_1 ( PE_MAX_FRAME_LENGTH >> 2 ) -#define PE_MAX_FRAME_LENGTH_ST_2 ( PE_MAX_FRAME_LENGTH >> 1 ) - -#define PE_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */ -#define PE_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */ -#define PE_MAX_LAG ( PE_MAX_LAG_MS * PE_MAX_FS_KHZ ) -#define PE_MIN_LAG ( PE_MIN_LAG_MS * PE_MAX_FS_KHZ ) - -#define PE_D_SRCH_LENGTH 24 - -#define PE_NB_STAGE3_LAGS 5 - -#define PE_NB_CBKS_STAGE2 3 -#define PE_NB_CBKS_STAGE2_EXT 11 - -#define PE_NB_CBKS_STAGE3_MAX 34 -#define PE_NB_CBKS_STAGE3_MID 24 -#define PE_NB_CBKS_STAGE3_MIN 16 - -#define PE_NB_CBKS_STAGE3_10MS 12 -#define PE_NB_CBKS_STAGE2_10MS 3 - -#define PE_SHORTLAG_BIAS 0.2f /* for logarithmic weighting */ -#define PE_PREVLAG_BIAS 0.2f /* for logarithmic weighting */ -#define PE_FLATCONTOUR_BIAS 0.05f - -#define SILK_PE_MIN_COMPLEX 0 -#define SILK_PE_MID_COMPLEX 1 -#define SILK_PE_MAX_COMPLEX 2 - -/* Tables for 20 ms frames */ -extern const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ]; -extern const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ]; -extern const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ]; -extern const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ]; - -/* Tables for 10 ms frames */ -extern const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ 3 ]; -extern const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 12 ]; -extern const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ]; - -#endif - diff --git a/thirdparty/opus/silk/pitch_est_tables.c b/thirdparty/opus/silk/pitch_est_tables.c deleted file mode 100644 index 81a8bacaca..0000000000 --- a/thirdparty/opus/silk/pitch_est_tables.c +++ /dev/null @@ -1,99 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "typedef.h" -#include "pitch_est_defines.h" - -const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ PE_NB_CBKS_STAGE2_10MS ] = -{ - {0, 1, 0}, - {0, 0, 1} -}; - -const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ PE_NB_CBKS_STAGE3_10MS ] = -{ - { 0, 0, 1,-1, 1,-1, 2,-2, 2,-2, 3,-3}, - { 0, 1, 0, 1,-1, 2,-1, 2,-2, 3,-2, 3} -}; - -const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ] = -{ - {-3, 7}, - {-2, 7} -}; - -const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ] = -{ - {0, 2,-1,-1,-1, 0, 0, 1, 1, 0, 1}, - {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, - {0,-1, 2, 1, 0, 1, 1, 0, 0,-1,-1} -}; - -const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ] = -{ - {0, 0, 1,-1, 0, 1,-1, 0,-1, 1,-2, 2,-2,-2, 2,-3, 2, 3,-3,-4, 3,-4, 4, 4,-5, 5,-6,-5, 6,-7, 6, 5, 8,-9}, - {0, 0, 1, 0, 0, 0, 0, 0, 0, 0,-1, 1, 0, 0, 1,-1, 0, 1,-1,-1, 1,-1, 2, 1,-1, 2,-2,-2, 2,-2, 2, 2, 3,-3}, - {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,-1, 1, 0, 0, 2, 1,-1, 2,-1,-1, 2,-1, 2, 2,-1, 3,-2,-2,-2, 3}, - {0, 1, 0, 0, 1, 0, 1,-1, 2,-1, 2,-1, 2, 3,-2, 3,-2,-2, 4, 4,-3, 5,-3,-4, 6,-4, 6, 5,-5, 8,-6,-5,-7, 9} -}; - -const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ] = -{ - /* Lags to search for low number of stage3 cbks */ - { - {-5,8}, - {-1,6}, - {-1,6}, - {-4,10} - }, - /* Lags to search for middle number of stage3 cbks */ - { - {-6,10}, - {-2,6}, - {-1,6}, - {-5,10} - }, - /* Lags to search for max number of stage3 cbks */ - { - {-9,12}, - {-3,7}, - {-2,7}, - {-7,13} - } -}; - -const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ] = -{ - PE_NB_CBKS_STAGE3_MIN, - PE_NB_CBKS_STAGE3_MID, - PE_NB_CBKS_STAGE3_MAX -}; diff --git a/thirdparty/opus/silk/process_NLSFs.c b/thirdparty/opus/silk/process_NLSFs.c deleted file mode 100644 index 0ab71f0163..0000000000 --- a/thirdparty/opus/silk/process_NLSFs.c +++ /dev/null @@ -1,107 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Limit, stabilize, convert and quantize NLSFs */ -void silk_process_NLSFs( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -) -{ - opus_int i, doInterpolate; - opus_int NLSF_mu_Q20; - opus_int16 i_sqr_Q15; - opus_int16 pNLSF0_temp_Q15[ MAX_LPC_ORDER ]; - opus_int16 pNLSFW_QW[ MAX_LPC_ORDER ]; - opus_int16 pNLSFW0_temp_QW[ MAX_LPC_ORDER ]; - - silk_assert( psEncC->speech_activity_Q8 >= 0 ); - silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) ); - silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) ); - - /***********************/ - /* Calculate mu values */ - /***********************/ - /* NLSF_mu = 0.003 - 0.0015 * psEnc->speech_activity; */ - NLSF_mu_Q20 = silk_SMLAWB( SILK_FIX_CONST( 0.003, 20 ), SILK_FIX_CONST( -0.001, 28 ), psEncC->speech_activity_Q8 ); - if( psEncC->nb_subfr == 2 ) { - /* Multiply by 1.5 for 10 ms packets */ - NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 ); - } - - silk_assert( NLSF_mu_Q20 > 0 ); - silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) ); - - /* Calculate NLSF weights */ - silk_NLSF_VQ_weights_laroia( pNLSFW_QW, pNLSF_Q15, psEncC->predictLPCOrder ); - - /* Update NLSF weights for interpolated NLSFs */ - doInterpolate = ( psEncC->useInterpolatedNLSFs == 1 ) && ( psEncC->indices.NLSFInterpCoef_Q2 < 4 ); - if( doInterpolate ) { - /* Calculate the interpolated NLSF vector for the first half */ - silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15, - psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder ); - - /* Calculate first half NLSF weights for the interpolated NLSFs */ - silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder ); - - /* Update NLSF weights with contribution from first half */ - i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 ); - for( i = 0; i < psEncC->predictLPCOrder; i++ ) { - pNLSFW_QW[ i ] = silk_ADD16( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), silk_RSHIFT( - silk_SMULBB( pNLSFW0_temp_QW[ i ], i_sqr_Q15 ), 16) ); - silk_assert( pNLSFW_QW[ i ] >= 1 ); - } - } - - silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW, - NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType ); - - /* Convert quantized NLSFs back to LPC coefficients */ - silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder ); - - if( doInterpolate ) { - /* Calculate the interpolated, quantized LSF vector for the first half */ - silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15, - psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder ); - - /* Convert back to LPC coefficients */ - silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder ); - - } else { - /* Copy LPC coefficients for first half from second half */ - silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER ); - silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) ); - } -} diff --git a/thirdparty/opus/silk/quant_LTP_gains.c b/thirdparty/opus/silk/quant_LTP_gains.c deleted file mode 100644 index 513a8c4468..0000000000 --- a/thirdparty/opus/silk/quant_LTP_gains.c +++ /dev/null @@ -1,129 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "tuning_parameters.h" - -void silk_quant_LTP_gains( - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ - opus_int8 *periodicity_index, /* O Periodicity Index */ - opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ - opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ - opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -) -{ - opus_int j, k, cbk_size; - opus_int8 temp_idx[ MAX_NB_SUBFR ]; - const opus_uint8 *cl_ptr_Q5; - const opus_int8 *cbk_ptr_Q7; - const opus_uint8 *cbk_gain_ptr_Q7; - const opus_int16 *b_Q14_ptr; - const opus_int32 *W_Q18_ptr; - opus_int32 rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14; - opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; - - /***************************************************/ - /* iterate over different codebooks with different */ - /* rates/distortions, and choose best */ - /***************************************************/ - min_rate_dist_Q14 = silk_int32_MAX; - best_sum_log_gain_Q7 = 0; - for( k = 0; k < 3; k++ ) { - /* Safety margin for pitch gain control, to take into account factors - such as state rescaling/rewhitening. */ - opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 ); - - cl_ptr_Q5 = silk_LTP_gain_BITS_Q5_ptrs[ k ]; - cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ k ]; - cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ]; - cbk_size = silk_LTP_vq_sizes[ k ]; - - /* Set up pointer to first subframe */ - W_Q18_ptr = W_Q18; - b_Q14_ptr = B_Q14; - - rate_dist_Q14 = 0; - sum_log_gain_tmp_Q7 = *sum_log_gain_Q7; - for( j = 0; j < nb_subfr; j++ ) { - max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) - + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; - - silk_VQ_WMat_EC( - &temp_idx[ j ], /* O index of best codebook vector */ - &rate_dist_Q14_subfr, /* O best weighted quantization error + mu * rate */ - &gain_Q7, /* O sum of absolute LTP coefficients */ - b_Q14_ptr, /* I input vector to be quantized */ - W_Q18_ptr, /* I weighting matrix */ - cbk_ptr_Q7, /* I codebook */ - cbk_gain_ptr_Q7, /* I codebook effective gains */ - cl_ptr_Q5, /* I code length for each codebook vector */ - mu_Q9, /* I tradeoff between weighted error and rate */ - max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - cbk_size, /* I number of vectors in codebook */ - arch /* I Run-time architecture */ - ); - - rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr ); - sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7 - + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 )); - - b_Q14_ptr += LTP_ORDER; - W_Q18_ptr += LTP_ORDER * LTP_ORDER; - } - - /* Avoid never finding a codebook */ - rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 ); - - if( rate_dist_Q14 < min_rate_dist_Q14 ) { - min_rate_dist_Q14 = rate_dist_Q14; - *periodicity_index = (opus_int8)k; - silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) ); - best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; - } - - /* Break early in low-complexity mode if rate distortion is below threshold */ - if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) { - break; - } - } - - cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ]; - for( j = 0; j < nb_subfr; j++ ) { - for( k = 0; k < LTP_ORDER; k++ ) { - B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 ); - } - } - *sum_log_gain_Q7 = best_sum_log_gain_Q7; -} diff --git a/thirdparty/opus/silk/resampler.c b/thirdparty/opus/silk/resampler.c deleted file mode 100644 index 374fbb3722..0000000000 --- a/thirdparty/opus/silk/resampler.c +++ /dev/null @@ -1,215 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* - * Matrix of resampling methods used: - * Fs_out (kHz) - * 8 12 16 24 48 - * - * 8 C UF U UF UF - * 12 AF C UF U UF - * Fs_in (kHz) 16 D AF C UF UF - * 24 AF D AF C U - * 48 AF AF AF D C - * - * C -> Copy (no resampling) - * D -> Allpass-based 2x downsampling - * U -> Allpass-based 2x upsampling - * UF -> Allpass-based 2x upsampling followed by FIR interpolation - * AF -> AR2 filter followed by FIR interpolation - */ - -#include "resampler_private.h" - -/* Tables with delay compensation values to equalize total delay for different modes */ -static const opus_int8 delay_matrix_enc[ 5 ][ 3 ] = { -/* in \ out 8 12 16 */ -/* 8 */ { 6, 0, 3 }, -/* 12 */ { 0, 7, 3 }, -/* 16 */ { 0, 1, 10 }, -/* 24 */ { 0, 2, 6 }, -/* 48 */ { 18, 10, 12 } -}; - -static const opus_int8 delay_matrix_dec[ 3 ][ 5 ] = { -/* in \ out 8 12 16 24 48 */ -/* 8 */ { 4, 0, 2, 0, 0 }, -/* 12 */ { 0, 9, 4, 7, 4 }, -/* 16 */ { 0, 3, 12, 7, 7 } -}; - -/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0, 1, 2, 3, 4] */ -#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 ) - -#define USE_silk_resampler_copy (0) -#define USE_silk_resampler_private_up2_HQ_wrapper (1) -#define USE_silk_resampler_private_IIR_FIR (2) -#define USE_silk_resampler_private_down_FIR (3) - -/* Initialize/reset the resampler state for a given pair of input/output sampling rates */ -opus_int silk_resampler_init( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ - opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ - opus_int forEnc /* I If 1: encoder; if 0: decoder */ -) -{ - opus_int up2x; - - /* Clear state */ - silk_memset( S, 0, sizeof( silk_resampler_state_struct ) ); - - /* Input checking */ - if( forEnc ) { - if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 && Fs_Hz_in != 24000 && Fs_Hz_in != 48000 ) || - ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) { - silk_assert( 0 ); - return -1; - } - S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; - } else { - if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 ) || - ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) { - silk_assert( 0 ); - return -1; - } - S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; - } - - S->Fs_in_kHz = silk_DIV32_16( Fs_Hz_in, 1000 ); - S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 ); - - /* Number of samples processed per batch */ - S->batchSize = S->Fs_in_kHz * RESAMPLER_MAX_BATCH_SIZE_MS; - - /* Find resampler with the right sampling ratio */ - up2x = 0; - if( Fs_Hz_out > Fs_Hz_in ) { - /* Upsample */ - if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 1 */ - /* Special case: directly use 2x upsampler */ - S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper; - } else { - /* Default resampler */ - S->resampler_function = USE_silk_resampler_private_IIR_FIR; - up2x = 1; - } - } else if ( Fs_Hz_out < Fs_Hz_in ) { - /* Downsample */ - S->resampler_function = USE_silk_resampler_private_down_FIR; - if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) { /* Fs_out : Fs_in = 3 : 4 */ - S->FIR_Fracs = 3; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; - S->Coefs = silk_Resampler_3_4_COEFS; - } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 3 */ - S->FIR_Fracs = 2; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; - S->Coefs = silk_Resampler_2_3_COEFS; - } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 2 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR1; - S->Coefs = silk_Resampler_1_2_COEFS; - } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 3 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; - S->Coefs = silk_Resampler_1_3_COEFS; - } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 4 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; - S->Coefs = silk_Resampler_1_4_COEFS; - } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 6 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; - S->Coefs = silk_Resampler_1_6_COEFS; - } else { - /* None available */ - silk_assert( 0 ); - return -1; - } - } else { - /* Input and output sampling rates are equal: copy */ - S->resampler_function = USE_silk_resampler_copy; - } - - /* Ratio of input/output samples */ - S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2x ), Fs_Hz_out ), 2 ); - /* Make sure the ratio is rounded up */ - while( silk_SMULWW( S->invRatio_Q16, Fs_Hz_out ) < silk_LSHIFT32( Fs_Hz_in, up2x ) ) { - S->invRatio_Q16++; - } - - return 0; -} - -/* Resampler: convert from one sampling rate to another */ -/* Input and output sampling rate are at most 48000 Hz */ -opus_int silk_resampler( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -) -{ - opus_int nSamples; - - /* Need at least 1 ms of input data */ - silk_assert( inLen >= S->Fs_in_kHz ); - /* Delay can't exceed the 1 ms of buffering */ - silk_assert( S->inputDelay <= S->Fs_in_kHz ); - - nSamples = S->Fs_in_kHz - S->inputDelay; - - /* Copy to delay buffer */ - silk_memcpy( &S->delayBuf[ S->inputDelay ], in, nSamples * sizeof( opus_int16 ) ); - - switch( S->resampler_function ) { - case USE_silk_resampler_private_up2_HQ_wrapper: - silk_resampler_private_up2_HQ_wrapper( S, out, S->delayBuf, S->Fs_in_kHz ); - silk_resampler_private_up2_HQ_wrapper( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); - break; - case USE_silk_resampler_private_IIR_FIR: - silk_resampler_private_IIR_FIR( S, out, S->delayBuf, S->Fs_in_kHz ); - silk_resampler_private_IIR_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); - break; - case USE_silk_resampler_private_down_FIR: - silk_resampler_private_down_FIR( S, out, S->delayBuf, S->Fs_in_kHz ); - silk_resampler_private_down_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); - break; - default: - silk_memcpy( out, S->delayBuf, S->Fs_in_kHz * sizeof( opus_int16 ) ); - silk_memcpy( &out[ S->Fs_out_kHz ], &in[ nSamples ], ( inLen - S->Fs_in_kHz ) * sizeof( opus_int16 ) ); - } - - /* Copy to delay buffer */ - silk_memcpy( S->delayBuf, &in[ inLen - S->inputDelay ], S->inputDelay * sizeof( opus_int16 ) ); - - return 0; -} diff --git a/thirdparty/opus/silk/resampler_down2.c b/thirdparty/opus/silk/resampler_down2.c deleted file mode 100644 index cec3634640..0000000000 --- a/thirdparty/opus/silk/resampler_down2.c +++ /dev/null @@ -1,74 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_rom.h" - -/* Downsample by a factor 2 */ -void silk_resampler_down2( - opus_int32 *S, /* I/O State vector [ 2 ] */ - opus_int16 *out, /* O Output signal [ floor(len/2) ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 inLen /* I Number of input samples */ -) -{ - opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 ); - opus_int32 in32, out32, Y, X; - - silk_assert( silk_resampler_down2_0 > 0 ); - silk_assert( silk_resampler_down2_1 < 0 ); - - /* Internal variables and state are in Q10 format */ - for( k = 0; k < len2; k++ ) { - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 ); - - /* All-pass section for even input sample */ - Y = silk_SUB32( in32, S[ 0 ] ); - X = silk_SMLAWB( Y, Y, silk_resampler_down2_1 ); - out32 = silk_ADD32( S[ 0 ], X ); - S[ 0 ] = silk_ADD32( in32, X ); - - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 ); - - /* All-pass section for odd input sample, and add to output of previous section */ - Y = silk_SUB32( in32, S[ 1 ] ); - X = silk_SMULWB( Y, silk_resampler_down2_0 ); - out32 = silk_ADD32( out32, S[ 1 ] ); - out32 = silk_ADD32( out32, X ); - S[ 1 ] = silk_ADD32( in32, X ); - - /* Add, convert back to int16 and store to output */ - out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32, 11 ) ); - } -} - diff --git a/thirdparty/opus/silk/resampler_down2_3.c b/thirdparty/opus/silk/resampler_down2_3.c deleted file mode 100644 index 4342614dcc..0000000000 --- a/thirdparty/opus/silk/resampler_down2_3.c +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" -#include "stack_alloc.h" - -#define ORDER_FIR 4 - -/* Downsample by a factor 2/3, low quality */ -void silk_resampler_down2_3( - opus_int32 *S, /* I/O State vector [ 6 ] */ - opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */ - const opus_int16 *in, /* I Input signal [ inLen ] */ - opus_int32 inLen /* I Number of input samples */ -) -{ - opus_int32 nSamplesIn, counter, res_Q6; - VARDECL( opus_int32, buf ); - opus_int32 *buf_ptr; - SAVE_STACK; - - ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 ); - - /* Copy buffered samples to start of buffer */ - silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) ); - - /* Iterate over blocks of frameSizeIn input samples */ - while( 1 ) { - nSamplesIn = silk_min( inLen, RESAMPLER_MAX_BATCH_SIZE_IN ); - - /* Second-order AR filter (output in Q8) */ - silk_resampler_private_AR2( &S[ ORDER_FIR ], &buf[ ORDER_FIR ], in, - silk_Resampler_2_3_COEFS_LQ, nSamplesIn ); - - /* Interpolate filtered signal */ - buf_ptr = buf; - counter = nSamplesIn; - while( counter > 2 ) { - /* Inner product */ - res_Q6 = silk_SMULWB( buf_ptr[ 0 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - - res_Q6 = silk_SMULWB( buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - - buf_ptr += 3; - counter -= 3; - } - - in += nSamplesIn; - inLen -= nSamplesIn; - - if( inLen > 0 ) { - /* More iterations to do; copy last part of filtered signal to beginning of buffer */ - silk_memcpy( buf, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) ); - } else { - break; - } - } - - /* Copy last part of filtered signal to the state for the next call */ - silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/resampler_private.h b/thirdparty/opus/silk/resampler_private.h deleted file mode 100644 index 422a7d9d95..0000000000 --- a/thirdparty/opus/silk/resampler_private.h +++ /dev/null @@ -1,88 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_RESAMPLER_PRIVATE_H -#define SILK_RESAMPLER_PRIVATE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "SigProc_FIX.h" -#include "resampler_structs.h" -#include "resampler_rom.h" - -/* Number of input samples to process in the inner loop */ -#define RESAMPLER_MAX_BATCH_SIZE_MS 10 -#define RESAMPLER_MAX_FS_KHZ 48 -#define RESAMPLER_MAX_BATCH_SIZE_IN ( RESAMPLER_MAX_BATCH_SIZE_MS * RESAMPLER_MAX_FS_KHZ ) - -/* Description: Hybrid IIR/FIR polyphase implementation of resampling */ -void silk_resampler_private_IIR_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -); - -/* Description: Hybrid IIR/FIR polyphase implementation of resampling */ -void silk_resampler_private_down_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -); - -/* Upsample by a factor 2, high quality */ -void silk_resampler_private_up2_HQ_wrapper( - void *SS, /* I/O Resampler state (unused) */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -); - -/* Upsample by a factor 2, high quality */ -void silk_resampler_private_up2_HQ( - opus_int32 *S, /* I/O Resampler state [ 6 ] */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -); - -/* Second order AR filter */ -void silk_resampler_private_AR2( - opus_int32 S[], /* I/O State vector [ 2 ] */ - opus_int32 out_Q8[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - const opus_int16 A_Q14[], /* I AR coefficients, Q14 */ - opus_int32 len /* I Signal length */ -); - -#ifdef __cplusplus -} -#endif -#endif /* SILK_RESAMPLER_PRIVATE_H */ diff --git a/thirdparty/opus/silk/resampler_private_AR2.c b/thirdparty/opus/silk/resampler_private_AR2.c deleted file mode 100644 index 5fff23714f..0000000000 --- a/thirdparty/opus/silk/resampler_private_AR2.c +++ /dev/null @@ -1,55 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" - -/* Second order AR filter with single delay elements */ -void silk_resampler_private_AR2( - opus_int32 S[], /* I/O State vector [ 2 ] */ - opus_int32 out_Q8[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - const opus_int16 A_Q14[], /* I AR coefficients, Q14 */ - opus_int32 len /* I Signal length */ -) -{ - opus_int32 k; - opus_int32 out32; - - for( k = 0; k < len; k++ ) { - out32 = silk_ADD_LSHIFT32( S[ 0 ], (opus_int32)in[ k ], 8 ); - out_Q8[ k ] = out32; - out32 = silk_LSHIFT( out32, 2 ); - S[ 0 ] = silk_SMLAWB( S[ 1 ], out32, A_Q14[ 0 ] ); - S[ 1 ] = silk_SMULWB( out32, A_Q14[ 1 ] ); - } -} - diff --git a/thirdparty/opus/silk/resampler_private_IIR_FIR.c b/thirdparty/opus/silk/resampler_private_IIR_FIR.c deleted file mode 100644 index 6b2b3a2e18..0000000000 --- a/thirdparty/opus/silk/resampler_private_IIR_FIR.c +++ /dev/null @@ -1,107 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" -#include "stack_alloc.h" - -static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL( - opus_int16 *out, - opus_int16 *buf, - opus_int32 max_index_Q16, - opus_int32 index_increment_Q16 -) -{ - opus_int32 index_Q16, res_Q15; - opus_int16 *buf_ptr; - opus_int32 table_index; - - /* Interpolate upsampled signal and store in output array */ - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - table_index = silk_SMULWB( index_Q16 & 0xFFFF, 12 ); - buf_ptr = &buf[ index_Q16 >> 16 ]; - - res_Q15 = silk_SMULBB( buf_ptr[ 0 ], silk_resampler_frac_FIR_12[ table_index ][ 0 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 1 ], silk_resampler_frac_FIR_12[ table_index ][ 1 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 2 ], silk_resampler_frac_FIR_12[ table_index ][ 2 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 3 ], silk_resampler_frac_FIR_12[ table_index ][ 3 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 4 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 3 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 5 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 2 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 6 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 1 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 7 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 0 ] ); - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q15, 15 ) ); - } - return out; -} -/* Upsample using a combination of allpass-based 2x upsampling and FIR interpolation */ -void silk_resampler_private_IIR_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -) -{ - silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; - opus_int32 nSamplesIn; - opus_int32 max_index_Q16, index_increment_Q16; - VARDECL( opus_int16, buf ); - SAVE_STACK; - - ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); - - /* Copy buffered samples to start of buffer */ - silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); - - /* Iterate over blocks of frameSizeIn input samples */ - index_increment_Q16 = S->invRatio_Q16; - while( 1 ) { - nSamplesIn = silk_min( inLen, S->batchSize ); - - /* Upsample 2x */ - silk_resampler_private_up2_HQ( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_12 ], in, nSamplesIn ); - - max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 + 1 ); /* + 1 because 2x upsampling */ - out = silk_resampler_private_IIR_FIR_INTERPOL( out, buf, max_index_Q16, index_increment_Q16 ); - in += nSamplesIn; - inLen -= nSamplesIn; - - if( inLen > 0 ) { - /* More iterations to do; copy last part of filtered signal to beginning of buffer */ - silk_memcpy( buf, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); - } else { - break; - } - } - - /* Copy last part of filtered signal to the state for the next call */ - silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/resampler_private_down_FIR.c b/thirdparty/opus/silk/resampler_private_down_FIR.c deleted file mode 100644 index 783e42b356..0000000000 --- a/thirdparty/opus/silk/resampler_private_down_FIR.c +++ /dev/null @@ -1,194 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" -#include "stack_alloc.h" - -static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL( - opus_int16 *out, - opus_int32 *buf, - const opus_int16 *FIR_Coefs, - opus_int FIR_Order, - opus_int FIR_Fracs, - opus_int32 max_index_Q16, - opus_int32 index_increment_Q16 -) -{ - opus_int32 index_Q16, res_Q6; - opus_int32 *buf_ptr; - opus_int32 interpol_ind; - const opus_int16 *interpol_ptr; - - switch( FIR_Order ) { - case RESAMPLER_DOWN_ORDER_FIR0: - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - /* Integer part gives pointer to buffered input */ - buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); - - /* Fractional part gives interpolation coefficients */ - interpol_ind = silk_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs ); - - /* Inner product */ - interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * interpol_ind ]; - res_Q6 = silk_SMULWB( buf_ptr[ 0 ], interpol_ptr[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 6 ], interpol_ptr[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 7 ], interpol_ptr[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 8 ], interpol_ptr[ 8 ] ); - interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * ( FIR_Fracs - 1 - interpol_ind ) ]; - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 17 ], interpol_ptr[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 16 ], interpol_ptr[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 15 ], interpol_ptr[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 14 ], interpol_ptr[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 13 ], interpol_ptr[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 12 ], interpol_ptr[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 9 ], interpol_ptr[ 8 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - } - break; - case RESAMPLER_DOWN_ORDER_FIR1: - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - /* Integer part gives pointer to buffered input */ - buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); - - /* Inner product */ - res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 23 ] ), FIR_Coefs[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 22 ] ), FIR_Coefs[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 21 ] ), FIR_Coefs[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 20 ] ), FIR_Coefs[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 19 ] ), FIR_Coefs[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 18 ] ), FIR_Coefs[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 17 ] ), FIR_Coefs[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 16 ] ), FIR_Coefs[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 15 ] ), FIR_Coefs[ 8 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 14 ] ), FIR_Coefs[ 9 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 13 ] ), FIR_Coefs[ 10 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 12 ] ), FIR_Coefs[ 11 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - } - break; - case RESAMPLER_DOWN_ORDER_FIR2: - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - /* Integer part gives pointer to buffered input */ - buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); - - /* Inner product */ - res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 35 ] ), FIR_Coefs[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 34 ] ), FIR_Coefs[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 33 ] ), FIR_Coefs[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 32 ] ), FIR_Coefs[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 31 ] ), FIR_Coefs[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 30 ] ), FIR_Coefs[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 29 ] ), FIR_Coefs[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 28 ] ), FIR_Coefs[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 27 ] ), FIR_Coefs[ 8 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 26 ] ), FIR_Coefs[ 9 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 25 ] ), FIR_Coefs[ 10 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 24 ] ), FIR_Coefs[ 11 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 12 ], buf_ptr[ 23 ] ), FIR_Coefs[ 12 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 13 ], buf_ptr[ 22 ] ), FIR_Coefs[ 13 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 14 ], buf_ptr[ 21 ] ), FIR_Coefs[ 14 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 15 ], buf_ptr[ 20 ] ), FIR_Coefs[ 15 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - } - break; - default: - silk_assert( 0 ); - } - return out; -} - -/* Resample with a 2nd order AR filter followed by FIR interpolation */ -void silk_resampler_private_down_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -) -{ - silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; - opus_int32 nSamplesIn; - opus_int32 max_index_Q16, index_increment_Q16; - VARDECL( opus_int32, buf ); - const opus_int16 *FIR_Coefs; - SAVE_STACK; - - ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 ); - - /* Copy buffered samples to start of buffer */ - silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) ); - - FIR_Coefs = &S->Coefs[ 2 ]; - - /* Iterate over blocks of frameSizeIn input samples */ - index_increment_Q16 = S->invRatio_Q16; - while( 1 ) { - nSamplesIn = silk_min( inLen, S->batchSize ); - - /* Second-order AR filter (output in Q8) */ - silk_resampler_private_AR2( S->sIIR, &buf[ S->FIR_Order ], in, S->Coefs, nSamplesIn ); - - max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 ); - - /* Interpolate filtered signal */ - out = silk_resampler_private_down_FIR_INTERPOL( out, buf, FIR_Coefs, S->FIR_Order, - S->FIR_Fracs, max_index_Q16, index_increment_Q16 ); - - in += nSamplesIn; - inLen -= nSamplesIn; - - if( inLen > 1 ) { - /* More iterations to do; copy last part of filtered signal to beginning of buffer */ - silk_memcpy( buf, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) ); - } else { - break; - } - } - - /* Copy last part of filtered signal to the state for the next call */ - silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/resampler_private_up2_HQ.c b/thirdparty/opus/silk/resampler_private_up2_HQ.c deleted file mode 100644 index c7ec8de365..0000000000 --- a/thirdparty/opus/silk/resampler_private_up2_HQ.c +++ /dev/null @@ -1,113 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" - -/* Upsample by a factor 2, high quality */ -/* Uses 2nd order allpass filters for the 2x upsampling, followed by a */ -/* notch filter just above Nyquist. */ -void silk_resampler_private_up2_HQ( - opus_int32 *S, /* I/O Resampler state [ 6 ] */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -) -{ - opus_int32 k; - opus_int32 in32, out32_1, out32_2, Y, X; - - silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 ); - silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 ); - silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 ); - silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 ); - silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 ); - silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 ); - - /* Internal variables and state are in Q10 format */ - for( k = 0; k < len; k++ ) { - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 ); - - /* First all-pass section for even output sample */ - Y = silk_SUB32( in32, S[ 0 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] ); - out32_1 = silk_ADD32( S[ 0 ], X ); - S[ 0 ] = silk_ADD32( in32, X ); - - /* Second all-pass section for even output sample */ - Y = silk_SUB32( out32_1, S[ 1 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] ); - out32_2 = silk_ADD32( S[ 1 ], X ); - S[ 1 ] = silk_ADD32( out32_1, X ); - - /* Third all-pass section for even output sample */ - Y = silk_SUB32( out32_2, S[ 2 ] ); - X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] ); - out32_1 = silk_ADD32( S[ 2 ], X ); - S[ 2 ] = silk_ADD32( out32_2, X ); - - /* Apply gain in Q15, convert back to int16 and store to output */ - out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); - - /* First all-pass section for odd output sample */ - Y = silk_SUB32( in32, S[ 3 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] ); - out32_1 = silk_ADD32( S[ 3 ], X ); - S[ 3 ] = silk_ADD32( in32, X ); - - /* Second all-pass section for odd output sample */ - Y = silk_SUB32( out32_1, S[ 4 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] ); - out32_2 = silk_ADD32( S[ 4 ], X ); - S[ 4 ] = silk_ADD32( out32_1, X ); - - /* Third all-pass section for odd output sample */ - Y = silk_SUB32( out32_2, S[ 5 ] ); - X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] ); - out32_1 = silk_ADD32( S[ 5 ], X ); - S[ 5 ] = silk_ADD32( out32_2, X ); - - /* Apply gain in Q15, convert back to int16 and store to output */ - out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); - } -} - -void silk_resampler_private_up2_HQ_wrapper( - void *SS, /* I/O Resampler state (unused) */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -) -{ - silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; - silk_resampler_private_up2_HQ( S->sIIR, out, in, len ); -} diff --git a/thirdparty/opus/silk/resampler_rom.c b/thirdparty/opus/silk/resampler_rom.c deleted file mode 100644 index 5e6b04476a..0000000000 --- a/thirdparty/opus/silk/resampler_rom.c +++ /dev/null @@ -1,96 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Filter coefficients for IIR/FIR polyphase resampling * - * Total size: 179 Words (358 Bytes) */ - -#include "resampler_private.h" - -/* Matlab code for the notch filter coefficients: */ -/* B = [1, 0.147, 1]; A = [1, 0.107, 0.89]; G = 0.93; freqz(G * B, A, 2^14, 16e3); axis([0, 8000, -10, 1]) */ -/* fprintf('\t%6d, %6d, %6d, %6d\n', round(B(2)*2^16), round(-A(2)*2^16), round((1-A(3))*2^16), round(G*2^15)) */ -/* const opus_int16 silk_resampler_up2_hq_notch[ 4 ] = { 9634, -7012, 7209, 30474 }; */ - -/* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */ -silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { - -20694, -13867, - -49, 64, 17, -157, 353, -496, 163, 11047, 22205, - -39, 6, 91, -170, 186, 23, -896, 6336, 19928, - -19, -36, 102, -89, -24, 328, -951, 2568, 15909, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { - -14457, -14019, - 64, 128, -122, 36, 310, -768, 584, 9267, 17733, - 12, 128, 18, -142, 288, -117, -865, 4123, 14459, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = { - 616, -14323, - -10, 39, 58, -46, -84, 120, 184, -315, -541, 1284, 5380, 9024, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 16102, -15162, - -13, 0, 20, 26, 5, -31, -43, -4, 65, 90, 7, -157, -248, -44, 593, 1583, 2612, 3271, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 22500, -15099, - 3, -14, -20, -15, 2, 25, 37, 25, -16, -71, -107, -79, 50, 292, 623, 982, 1288, 1464, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 27540, -15257, - 17, 12, 8, 1, -10, -22, -30, -32, -22, 3, 44, 100, 168, 243, 317, 381, 429, 455, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = { - -2797, -6507, - 4697, 10739, - 1567, 8276, -}; - -/* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */ -silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = { - { 189, -600, 617, 30567 }, - { 117, -159, -1070, 29704 }, - { 52, 221, -2392, 28276 }, - { -4, 529, -3350, 26341 }, - { -48, 758, -3956, 23973 }, - { -80, 905, -4235, 21254 }, - { -99, 972, -4222, 18278 }, - { -107, 967, -3957, 15143 }, - { -103, 896, -3487, 11950 }, - { -91, 773, -2865, 8798 }, - { -71, 611, -2143, 5784 }, - { -46, 425, -1375, 2996 }, -}; diff --git a/thirdparty/opus/silk/resampler_rom.h b/thirdparty/opus/silk/resampler_rom.h deleted file mode 100644 index 490b3388dc..0000000000 --- a/thirdparty/opus/silk/resampler_rom.h +++ /dev/null @@ -1,68 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_FIX_RESAMPLER_ROM_H -#define SILK_FIX_RESAMPLER_ROM_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "typedef.h" -#include "resampler_structs.h" - -#define RESAMPLER_DOWN_ORDER_FIR0 18 -#define RESAMPLER_DOWN_ORDER_FIR1 24 -#define RESAMPLER_DOWN_ORDER_FIR2 36 -#define RESAMPLER_ORDER_FIR_12 8 - -/* Tables for 2x downsampler */ -static const opus_int16 silk_resampler_down2_0 = 9872; -static const opus_int16 silk_resampler_down2_1 = 39809 - 65536; - -/* Tables for 2x upsampler, high quality */ -static const opus_int16 silk_resampler_up2_hq_0[ 3 ] = { 1746, 14986, 39083 - 65536 }; -static const opus_int16 silk_resampler_up2_hq_1[ 3 ] = { 6854, 25769, 55542 - 65536 }; - -/* Tables with IIR and FIR coefficients for fractional downsamplers */ -extern const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ]; -extern const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ]; -extern const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ]; -extern const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; -extern const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; -extern const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; -extern const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ]; - -/* Table with interplation fractions of 1/24, 3/24, ..., 23/24 */ -extern const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ]; - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_FIX_RESAMPLER_ROM_H */ diff --git a/thirdparty/opus/silk/resampler_structs.h b/thirdparty/opus/silk/resampler_structs.h deleted file mode 100644 index 9e9457d11c..0000000000 --- a/thirdparty/opus/silk/resampler_structs.h +++ /dev/null @@ -1,60 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_RESAMPLER_STRUCTS_H -#define SILK_RESAMPLER_STRUCTS_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define SILK_RESAMPLER_MAX_FIR_ORDER 36 -#define SILK_RESAMPLER_MAX_IIR_ORDER 6 - -typedef struct _silk_resampler_state_struct{ - opus_int32 sIIR[ SILK_RESAMPLER_MAX_IIR_ORDER ]; /* this must be the first element of this struct */ - union{ - opus_int32 i32[ SILK_RESAMPLER_MAX_FIR_ORDER ]; - opus_int16 i16[ SILK_RESAMPLER_MAX_FIR_ORDER ]; - } sFIR; - opus_int16 delayBuf[ 48 ]; - opus_int resampler_function; - opus_int batchSize; - opus_int32 invRatio_Q16; - opus_int FIR_Order; - opus_int FIR_Fracs; - opus_int Fs_in_kHz; - opus_int Fs_out_kHz; - opus_int inputDelay; - const opus_int16 *Coefs; -} silk_resampler_state_struct; - -#ifdef __cplusplus -} -#endif -#endif /* SILK_RESAMPLER_STRUCTS_H */ - diff --git a/thirdparty/opus/silk/shell_coder.c b/thirdparty/opus/silk/shell_coder.c deleted file mode 100644 index 4af341474b..0000000000 --- a/thirdparty/opus/silk/shell_coder.c +++ /dev/null @@ -1,151 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* shell coder; pulse-subframe length is hardcoded */ - -static OPUS_INLINE void combine_pulses( - opus_int *out, /* O combined pulses vector [len] */ - const opus_int *in, /* I input vector [2 * len] */ - const opus_int len /* I number of OUTPUT samples */ -) -{ - opus_int k; - for( k = 0; k < len; k++ ) { - out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ]; - } -} - -static OPUS_INLINE void encode_split( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int p_child1, /* I pulse amplitude of first child subframe */ - const opus_int p, /* I pulse amplitude of current subframe */ - const opus_uint8 *shell_table /* I table of shell cdfs */ -) -{ - if( p > 0 ) { - ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 ); - } -} - -static OPUS_INLINE void decode_split( - opus_int16 *p_child1, /* O pulse amplitude of first child subframe */ - opus_int16 *p_child2, /* O pulse amplitude of second child subframe */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - const opus_int p, /* I pulse amplitude of current subframe */ - const opus_uint8 *shell_table /* I table of shell cdfs */ -) -{ - if( p > 0 ) { - p_child1[ 0 ] = ec_dec_icdf( psRangeDec, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 ); - p_child2[ 0 ] = p - p_child1[ 0 ]; - } else { - p_child1[ 0 ] = 0; - p_child2[ 0 ] = 0; - } -} - -/* Shell encoder, operates on one shell code frame of 16 pulses */ -void silk_shell_encoder( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */ -) -{ - opus_int pulses1[ 8 ], pulses2[ 4 ], pulses3[ 2 ], pulses4[ 1 ]; - - /* this function operates on one shell code frame of 16 pulses */ - silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); - - /* tree representation per pulse-subframe */ - combine_pulses( pulses1, pulses0, 8 ); - combine_pulses( pulses2, pulses1, 4 ); - combine_pulses( pulses3, pulses2, 2 ); - combine_pulses( pulses4, pulses3, 1 ); - - encode_split( psRangeEnc, pulses3[ 0 ], pulses4[ 0 ], silk_shell_code_table3 ); - - encode_split( psRangeEnc, pulses2[ 0 ], pulses3[ 0 ], silk_shell_code_table2 ); - - encode_split( psRangeEnc, pulses1[ 0 ], pulses2[ 0 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 0 ], pulses1[ 0 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 2 ], pulses1[ 1 ], silk_shell_code_table0 ); - - encode_split( psRangeEnc, pulses1[ 2 ], pulses2[ 1 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 4 ], pulses1[ 2 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 6 ], pulses1[ 3 ], silk_shell_code_table0 ); - - encode_split( psRangeEnc, pulses2[ 2 ], pulses3[ 1 ], silk_shell_code_table2 ); - - encode_split( psRangeEnc, pulses1[ 4 ], pulses2[ 2 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 8 ], pulses1[ 4 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 10 ], pulses1[ 5 ], silk_shell_code_table0 ); - - encode_split( psRangeEnc, pulses1[ 6 ], pulses2[ 3 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 12 ], pulses1[ 6 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 14 ], pulses1[ 7 ], silk_shell_code_table0 ); -} - - -/* Shell decoder, operates on one shell code frame of 16 pulses */ -void silk_shell_decoder( - opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - const opus_int pulses4 /* I number of pulses per pulse-subframe */ -) -{ - opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; - - /* this function operates on one shell code frame of 16 pulses */ - silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); - - decode_split( &pulses3[ 0 ], &pulses3[ 1 ], psRangeDec, pulses4, silk_shell_code_table3 ); - - decode_split( &pulses2[ 0 ], &pulses2[ 1 ], psRangeDec, pulses3[ 0 ], silk_shell_code_table2 ); - - decode_split( &pulses1[ 0 ], &pulses1[ 1 ], psRangeDec, pulses2[ 0 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 0 ], &pulses0[ 1 ], psRangeDec, pulses1[ 0 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 2 ], &pulses0[ 3 ], psRangeDec, pulses1[ 1 ], silk_shell_code_table0 ); - - decode_split( &pulses1[ 2 ], &pulses1[ 3 ], psRangeDec, pulses2[ 1 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 4 ], &pulses0[ 5 ], psRangeDec, pulses1[ 2 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 6 ], &pulses0[ 7 ], psRangeDec, pulses1[ 3 ], silk_shell_code_table0 ); - - decode_split( &pulses2[ 2 ], &pulses2[ 3 ], psRangeDec, pulses3[ 1 ], silk_shell_code_table2 ); - - decode_split( &pulses1[ 4 ], &pulses1[ 5 ], psRangeDec, pulses2[ 2 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 8 ], &pulses0[ 9 ], psRangeDec, pulses1[ 4 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 10 ], &pulses0[ 11 ], psRangeDec, pulses1[ 5 ], silk_shell_code_table0 ); - - decode_split( &pulses1[ 6 ], &pulses1[ 7 ], psRangeDec, pulses2[ 3 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 12 ], &pulses0[ 13 ], psRangeDec, pulses1[ 6 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 14 ], &pulses0[ 15 ], psRangeDec, pulses1[ 7 ], silk_shell_code_table0 ); -} diff --git a/thirdparty/opus/silk/sigm_Q15.c b/thirdparty/opus/silk/sigm_Q15.c deleted file mode 100644 index 3c507d255b..0000000000 --- a/thirdparty/opus/silk/sigm_Q15.c +++ /dev/null @@ -1,76 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Approximate sigmoid function */ - -#include "SigProc_FIX.h" - -/* fprintf(1, '%d, ', round(1024 * ([1 ./ (1 + exp(-(1:5))), 1] - 1 ./ (1 + exp(-(0:5)))))); */ -static const opus_int32 sigm_LUT_slope_Q10[ 6 ] = { - 237, 153, 73, 30, 12, 7 -}; -/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp(-(0:5))))); */ -static const opus_int32 sigm_LUT_pos_Q15[ 6 ] = { - 16384, 23955, 28861, 31213, 32178, 32548 -}; -/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp((0:5))))); */ -static const opus_int32 sigm_LUT_neg_Q15[ 6 ] = { - 16384, 8812, 3906, 1554, 589, 219 -}; - -opus_int silk_sigm_Q15( - opus_int in_Q5 /* I */ -) -{ - opus_int ind; - - if( in_Q5 < 0 ) { - /* Negative input */ - in_Q5 = -in_Q5; - if( in_Q5 >= 6 * 32 ) { - return 0; /* Clip */ - } else { - /* Linear interpolation of look up table */ - ind = silk_RSHIFT( in_Q5, 5 ); - return( sigm_LUT_neg_Q15[ ind ] - silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) ); - } - } else { - /* Positive input */ - if( in_Q5 >= 6 * 32 ) { - return 32767; /* clip */ - } else { - /* Linear interpolation of look up table */ - ind = silk_RSHIFT( in_Q5, 5 ); - return( sigm_LUT_pos_Q15[ ind ] + silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) ); - } - } -} - diff --git a/thirdparty/opus/silk/sort.c b/thirdparty/opus/silk/sort.c deleted file mode 100644 index 7187c9efb1..0000000000 --- a/thirdparty/opus/silk/sort.c +++ /dev/null @@ -1,154 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Insertion sort (fast for already almost sorted arrays): */ -/* Best case: O(n) for an already sorted array */ -/* Worst case: O(n^2) for an inversely sorted array */ -/* */ -/* Shell short: https://en.wikipedia.org/wiki/Shell_sort */ - -#include "SigProc_FIX.h" - -void silk_insertion_sort_increasing( - opus_int32 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -) -{ - opus_int32 value; - opus_int i, j; - - /* Safety checks */ - silk_assert( K > 0 ); - silk_assert( L > 0 ); - silk_assert( L >= K ); - - /* Write start indices in index vector */ - for( i = 0; i < K; i++ ) { - idx[ i ] = i; - } - - /* Sort vector elements by value, increasing order */ - for( i = 1; i < K; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - - /* If less than L values are asked for, check the remaining values, */ - /* but only spend CPU to ensure that the K first values are correct */ - for( i = K; i < L; i++ ) { - value = a[ i ]; - if( value < a[ K - 1 ] ) { - for( j = K - 2; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - } -} - -#ifdef FIXED_POINT -/* This function is only used by the fixed-point build */ -void silk_insertion_sort_decreasing_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -) -{ - opus_int i, j; - opus_int value; - - /* Safety checks */ - silk_assert( K > 0 ); - silk_assert( L > 0 ); - silk_assert( L >= K ); - - /* Write start indices in index vector */ - for( i = 0; i < K; i++ ) { - idx[ i ] = i; - } - - /* Sort vector elements by value, decreasing order */ - for( i = 1; i < K; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - - /* If less than L values are asked for, check the remaining values, */ - /* but only spend CPU to ensure that the K first values are correct */ - for( i = K; i < L; i++ ) { - value = a[ i ]; - if( value > a[ K - 1 ] ) { - for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - } -} -#endif - -void silk_insertion_sort_increasing_all_values_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - const opus_int L /* I Vector length */ -) -{ - opus_int value; - opus_int i, j; - - /* Safety checks */ - silk_assert( L > 0 ); - - /* Sort vector elements by value, increasing order */ - for( i = 1; i < L; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - } - a[ j + 1 ] = value; /* Write value */ - } -} diff --git a/thirdparty/opus/silk/stereo_LR_to_MS.c b/thirdparty/opus/silk/stereo_LR_to_MS.c deleted file mode 100644 index dda0298de2..0000000000 --- a/thirdparty/opus/silk/stereo_LR_to_MS.c +++ /dev/null @@ -1,229 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/* Convert Left/Right stereo signal to adaptive Mid/Side representation */ -void silk_stereo_LR_to_MS( - stereo_enc_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */ - opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */ - opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ - opus_int32 total_rate_bps, /* I Total bitrate */ - opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ - opus_int toMono, /* I Last frame before a stereo->mono transition */ - opus_int fs_kHz, /* I Sample rate (kHz) */ - opus_int frame_length /* I Number of samples */ -) -{ - opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13; - opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13; - opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24; - VARDECL( opus_int16, side ); - VARDECL( opus_int16, LP_mid ); - VARDECL( opus_int16, HP_mid ); - VARDECL( opus_int16, LP_side ); - VARDECL( opus_int16, HP_side ); - opus_int16 *mid = &x1[ -2 ]; - SAVE_STACK; - - ALLOC( side, frame_length + 2, opus_int16 ); - /* Convert to basic mid/side signals */ - for( n = 0; n < frame_length + 2; n++ ) { - sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ]; - diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ]; - mid[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); - side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) ); - } - - /* Buffering */ - silk_memcpy( mid, state->sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sMid, &mid[ frame_length ], 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) ); - - /* LP and HP filter mid signal */ - ALLOC( LP_mid, frame_length, opus_int16 ); - ALLOC( HP_mid, frame_length, opus_int16 ); - for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); - LP_mid[ n ] = sum; - HP_mid[ n ] = mid[ n + 1 ] - sum; - } - - /* LP and HP filter side signal */ - ALLOC( LP_side, frame_length, opus_int16 ); - ALLOC( HP_side, frame_length, opus_int16 ); - for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); - LP_side[ n ] = sum; - HP_side[ n ] = side[ n + 1 ] - sum; - } - - /* Find energies and predictors */ - is10msFrame = frame_length == 10 * fs_kHz; - smooth_coef_Q16 = is10msFrame ? - SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) : - SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 ); - smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 ); - - pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 ); - pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 ); - /* Ratio of the norms of residual and mid signals */ - frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 ); - frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) ); - - /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */ - total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */ - if( total_rate_bps < 1 ) { - total_rate_bps = 1; - } - min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 ); - silk_assert( min_mid_rate_bps < 32767 ); - /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */ - frac_3_Q16 = silk_MUL( 3, frac_Q16 ); - mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 ); - /* If Mid bitrate below minimum, reduce stereo width */ - if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) { - mid_side_rates_bps[ 0 ] = min_mid_rate_bps; - mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; - /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */ - width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps, - silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 ); - width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) ); - } else { - mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; - width_Q14 = SILK_FIX_CONST( 1, 14 ); - } - - /* Smoother */ - state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 ); - - /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */ - *mid_only_flag = 0; - if( toMono ) { - /* Last frame before stereo->mono transition; collapse stereo width */ - width_Q14 = 0; - pred_Q13[ 0 ] = 0; - pred_Q13[ 1 ] = 0; - silk_stereo_quant_pred( pred_Q13, ix ); - } else if( state->width_prev_Q14 == 0 && - ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) ) - { - /* Code as panned-mono; previous frame already had zero width */ - /* Scale down and quantize predictors */ - pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); - pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); - silk_stereo_quant_pred( pred_Q13, ix ); - /* Collapse stereo width */ - width_Q14 = 0; - pred_Q13[ 0 ] = 0; - pred_Q13[ 1 ] = 0; - mid_side_rates_bps[ 0 ] = total_rate_bps; - mid_side_rates_bps[ 1 ] = 0; - *mid_only_flag = 1; - } else if( state->width_prev_Q14 != 0 && - ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) ) - { - /* Transition to zero-width stereo */ - /* Scale down and quantize predictors */ - pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); - pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); - silk_stereo_quant_pred( pred_Q13, ix ); - /* Collapse stereo width */ - width_Q14 = 0; - pred_Q13[ 0 ] = 0; - pred_Q13[ 1 ] = 0; - } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) { - /* Full-width stereo coding */ - silk_stereo_quant_pred( pred_Q13, ix ); - width_Q14 = SILK_FIX_CONST( 1, 14 ); - } else { - /* Reduced-width stereo coding; scale down and quantize predictors */ - pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); - pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); - silk_stereo_quant_pred( pred_Q13, ix ); - width_Q14 = state->smth_width_Q14; - } - - /* Make sure to keep on encoding until the tapered output has been transmitted */ - if( *mid_only_flag == 1 ) { - state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz; - if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) { - *mid_only_flag = 0; - } else { - /* Limit to avoid wrapping around */ - state->silent_side_len = 10000; - } - } else { - state->silent_side_len = 0; - } - - if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) { - mid_side_rates_bps[ 1 ] = 1; - mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]); - } - - /* Interpolate predictors and subtract prediction from side channel */ - pred0_Q13 = -state->pred_prev_Q13[ 0 ]; - pred1_Q13 = -state->pred_prev_Q13[ 1 ]; - w_Q24 = silk_LSHIFT( state->width_prev_Q14, 10 ); - denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); - delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); - delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); - deltaw_Q24 = silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 ); - for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { - pred0_Q13 += delta0_Q13; - pred1_Q13 += delta1_Q13; - w_Q24 += deltaw_Q24; - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - - pred0_Q13 = -pred_Q13[ 0 ]; - pred1_Q13 = -pred_Q13[ 1 ]; - w_Q24 = silk_LSHIFT( width_Q14, 10 ); - for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ]; - state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ]; - state->width_prev_Q14 = (opus_int16)width_Q14; - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/stereo_MS_to_LR.c b/thirdparty/opus/silk/stereo_MS_to_LR.c deleted file mode 100644 index 62521a4f35..0000000000 --- a/thirdparty/opus/silk/stereo_MS_to_LR.c +++ /dev/null @@ -1,85 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Convert adaptive Mid/Side representation to Left/Right stereo signal */ -void silk_stereo_MS_to_LR( - stereo_dec_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - const opus_int32 pred_Q13[], /* I Predictors */ - opus_int fs_kHz, /* I Samples rate (kHz) */ - opus_int frame_length /* I Number of samples */ -) -{ - opus_int n, denom_Q16, delta0_Q13, delta1_Q13; - opus_int32 sum, diff, pred0_Q13, pred1_Q13; - - /* Buffering */ - silk_memcpy( x1, state->sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sMid, &x1[ frame_length ], 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) ); - - /* Interpolate predictors and add prediction to side channel */ - pred0_Q13 = state->pred_prev_Q13[ 0 ]; - pred1_Q13 = state->pred_prev_Q13[ 1 ]; - denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); - delta0_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); - delta1_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); - for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { - pred0_Q13 += delta0_Q13; - pred1_Q13 += delta1_Q13; - sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - pred0_Q13 = pred_Q13[ 0 ]; - pred1_Q13 = pred_Q13[ 1 ]; - for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { - sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ]; - state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ]; - - /* Convert to left/right signals */ - for( n = 0; n < frame_length; n++ ) { - sum = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ]; - diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ]; - x1[ n + 1 ] = (opus_int16)silk_SAT16( sum ); - x2[ n + 1 ] = (opus_int16)silk_SAT16( diff ); - } -} diff --git a/thirdparty/opus/silk/stereo_decode_pred.c b/thirdparty/opus/silk/stereo_decode_pred.c deleted file mode 100644 index 56ba3925e8..0000000000 --- a/thirdparty/opus/silk/stereo_decode_pred.c +++ /dev/null @@ -1,73 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Decode mid/side predictors */ -void silk_stereo_decode_pred( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int32 pred_Q13[] /* O Predictors */ -) -{ - opus_int n, ix[ 2 ][ 3 ]; - opus_int32 low_Q13, step_Q13; - - /* Entropy decoding */ - n = ec_dec_icdf( psRangeDec, silk_stereo_pred_joint_iCDF, 8 ); - ix[ 0 ][ 2 ] = silk_DIV32_16( n, 5 ); - ix[ 1 ][ 2 ] = n - 5 * ix[ 0 ][ 2 ]; - for( n = 0; n < 2; n++ ) { - ix[ n ][ 0 ] = ec_dec_icdf( psRangeDec, silk_uniform3_iCDF, 8 ); - ix[ n ][ 1 ] = ec_dec_icdf( psRangeDec, silk_uniform5_iCDF, 8 ); - } - - /* Dequantize */ - for( n = 0; n < 2; n++ ) { - ix[ n ][ 0 ] += 3 * ix[ n ][ 2 ]; - low_Q13 = silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] ]; - step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] + 1 ] - low_Q13, - SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); - pred_Q13[ n ] = silk_SMLABB( low_Q13, step_Q13, 2 * ix[ n ][ 1 ] + 1 ); - } - - /* Subtract second from first predictor (helps when actually applying these) */ - pred_Q13[ 0 ] -= pred_Q13[ 1 ]; -} - -/* Decode mid-only flag */ -void silk_stereo_decode_mid_only( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int *decode_only_mid /* O Flag that only mid channel has been coded */ -) -{ - /* Decode flag that only mid channel is coded */ - *decode_only_mid = ec_dec_icdf( psRangeDec, silk_stereo_only_code_mid_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/stereo_encode_pred.c b/thirdparty/opus/silk/stereo_encode_pred.c deleted file mode 100644 index e6dd195066..0000000000 --- a/thirdparty/opus/silk/stereo_encode_pred.c +++ /dev/null @@ -1,62 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Entropy code the mid/side quantization indices */ -void silk_stereo_encode_pred( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */ -) -{ - opus_int n; - - /* Entropy coding */ - n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ]; - silk_assert( n < 25 ); - ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 ); - for( n = 0; n < 2; n++ ) { - silk_assert( ix[ n ][ 0 ] < 3 ); - silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS ); - ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 ); - ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 ); - } -} - -/* Entropy code the mid-only flag */ -void silk_stereo_encode_mid_only( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 mid_only_flag -) -{ - /* Encode flag that only mid channel is coded */ - ec_enc_icdf( psRangeEnc, mid_only_flag, silk_stereo_only_code_mid_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/stereo_find_predictor.c b/thirdparty/opus/silk/stereo_find_predictor.c deleted file mode 100644 index e30e90bddc..0000000000 --- a/thirdparty/opus/silk/stereo_find_predictor.c +++ /dev/null @@ -1,79 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Find least-squares prediction gain for one signal based on another and quantize it */ -opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */ - opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */ - const opus_int16 x[], /* I Basis signal */ - const opus_int16 y[], /* I Target signal */ - opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */ - opus_int length, /* I Number of samples */ - opus_int smooth_coef_Q16 /* I Smoothing coefficient */ -) -{ - opus_int scale, scale1, scale2; - opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10; - - /* Find predictor */ - silk_sum_sqr_shift( &nrgx, &scale1, x, length ); - silk_sum_sqr_shift( &nrgy, &scale2, y, length ); - scale = silk_max_int( scale1, scale2 ); - scale = scale + ( scale & 1 ); /* make even */ - nrgy = silk_RSHIFT32( nrgy, scale - scale2 ); - nrgx = silk_RSHIFT32( nrgx, scale - scale1 ); - nrgx = silk_max_int( nrgx, 1 ); - corr = silk_inner_prod_aligned_scale( x, y, scale, length ); - pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 ); - pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 ); - pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 ); - - /* Faster update for signals with large prediction parameters */ - smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) ); - - /* Smoothed mid and residual norms */ - silk_assert( smooth_coef_Q16 < 32768 ); - scale = silk_RSHIFT( scale, 1 ); - mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ], - smooth_coef_Q16 ); - /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */ - nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 ); - nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 ); - mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ], - smooth_coef_Q16 ); - - /* Ratio of smoothed residual and mid norms */ - *ratio_Q14 = silk_DIV32_varQ( mid_res_amp_Q0[ 1 ], silk_max( mid_res_amp_Q0[ 0 ], 1 ), 14 ); - *ratio_Q14 = silk_LIMIT( *ratio_Q14, 0, 32767 ); - - return pred_Q13; -} diff --git a/thirdparty/opus/silk/stereo_quant_pred.c b/thirdparty/opus/silk/stereo_quant_pred.c deleted file mode 100644 index d4ced6c3e8..0000000000 --- a/thirdparty/opus/silk/stereo_quant_pred.c +++ /dev/null @@ -1,73 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Quantize mid/side predictors */ -void silk_stereo_quant_pred( - opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */ - opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */ -) -{ - opus_int i, j, n; - opus_int32 low_Q13, step_Q13, lvl_Q13, err_min_Q13, err_Q13, quant_pred_Q13 = 0; - - /* Quantize */ - for( n = 0; n < 2; n++ ) { - /* Brute-force search over quantization levels */ - err_min_Q13 = silk_int32_MAX; - for( i = 0; i < STEREO_QUANT_TAB_SIZE - 1; i++ ) { - low_Q13 = silk_stereo_pred_quant_Q13[ i ]; - step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ i + 1 ] - low_Q13, - SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); - for( j = 0; j < STEREO_QUANT_SUB_STEPS; j++ ) { - lvl_Q13 = silk_SMLABB( low_Q13, step_Q13, 2 * j + 1 ); - err_Q13 = silk_abs( pred_Q13[ n ] - lvl_Q13 ); - if( err_Q13 < err_min_Q13 ) { - err_min_Q13 = err_Q13; - quant_pred_Q13 = lvl_Q13; - ix[ n ][ 0 ] = i; - ix[ n ][ 1 ] = j; - } else { - /* Error increasing, so we're past the optimum */ - goto done; - } - } - } - done: - ix[ n ][ 2 ] = silk_DIV32_16( ix[ n ][ 0 ], 3 ); - ix[ n ][ 0 ] -= ix[ n ][ 2 ] * 3; - pred_Q13[ n ] = quant_pred_Q13; - } - - /* Subtract second from first predictor (helps when actually applying these) */ - pred_Q13[ 0 ] -= pred_Q13[ 1 ]; -} diff --git a/thirdparty/opus/silk/structs.h b/thirdparty/opus/silk/structs.h deleted file mode 100644 index 827829dc6f..0000000000 --- a/thirdparty/opus/silk/structs.h +++ /dev/null @@ -1,327 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_STRUCTS_H -#define SILK_STRUCTS_H - -#include "typedef.h" -#include "SigProc_FIX.h" -#include "define.h" -#include "entenc.h" -#include "entdec.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/************************************/ -/* Noise shaping quantization state */ -/************************************/ -typedef struct { - opus_int16 xq[ 2 * MAX_FRAME_LENGTH ]; /* Buffer for quantized output signal */ - opus_int32 sLTP_shp_Q14[ 2 * MAX_FRAME_LENGTH ]; - opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; - opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 sLF_AR_shp_Q14; - opus_int lagPrev; - opus_int sLTP_buf_idx; - opus_int sLTP_shp_buf_idx; - opus_int32 rand_seed; - opus_int32 prev_gain_Q16; - opus_int rewhite_flag; -} silk_nsq_state; - -/********************************/ -/* VAD state */ -/********************************/ -typedef struct { - opus_int32 AnaState[ 2 ]; /* Analysis filterbank state: 0-8 kHz */ - opus_int32 AnaState1[ 2 ]; /* Analysis filterbank state: 0-4 kHz */ - opus_int32 AnaState2[ 2 ]; /* Analysis filterbank state: 0-2 kHz */ - opus_int32 XnrgSubfr[ VAD_N_BANDS ]; /* Subframe energies */ - opus_int32 NrgRatioSmth_Q8[ VAD_N_BANDS ]; /* Smoothed energy level in each band */ - opus_int16 HPstate; /* State of differentiator in the lowest band */ - opus_int32 NL[ VAD_N_BANDS ]; /* Noise energy level in each band */ - opus_int32 inv_NL[ VAD_N_BANDS ]; /* Inverse noise energy level in each band */ - opus_int32 NoiseLevelBias[ VAD_N_BANDS ]; /* Noise level estimator bias/offset */ - opus_int32 counter; /* Frame counter used in the initial phase */ -} silk_VAD_state; - -/* Variable cut-off low-pass filter state */ -typedef struct { - opus_int32 In_LP_State[ 2 ]; /* Low pass filter state */ - opus_int32 transition_frame_no; /* Counter which is mapped to a cut-off frequency */ - opus_int mode; /* Operating mode, <0: switch down, >0: switch up; 0: do nothing */ -} silk_LP_state; - -/* Structure containing NLSF codebook */ -typedef struct { - const opus_int16 nVectors; - const opus_int16 order; - const opus_int16 quantStepSize_Q16; - const opus_int16 invQuantStepSize_Q6; - const opus_uint8 *CB1_NLSF_Q8; - const opus_uint8 *CB1_iCDF; - const opus_uint8 *pred_Q8; - const opus_uint8 *ec_sel; - const opus_uint8 *ec_iCDF; - const opus_uint8 *ec_Rates_Q5; - const opus_int16 *deltaMin_Q15; -} silk_NLSF_CB_struct; - -typedef struct { - opus_int16 pred_prev_Q13[ 2 ]; - opus_int16 sMid[ 2 ]; - opus_int16 sSide[ 2 ]; - opus_int32 mid_side_amp_Q0[ 4 ]; - opus_int16 smth_width_Q14; - opus_int16 width_prev_Q14; - opus_int16 silent_side_len; - opus_int8 predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ]; - opus_int8 mid_only_flags[ MAX_FRAMES_PER_PACKET ]; -} stereo_enc_state; - -typedef struct { - opus_int16 pred_prev_Q13[ 2 ]; - opus_int16 sMid[ 2 ]; - opus_int16 sSide[ 2 ]; -} stereo_dec_state; - -typedef struct { - opus_int8 GainsIndices[ MAX_NB_SUBFR ]; - opus_int8 LTPIndex[ MAX_NB_SUBFR ]; - opus_int8 NLSFIndices[ MAX_LPC_ORDER + 1 ]; - opus_int16 lagIndex; - opus_int8 contourIndex; - opus_int8 signalType; - opus_int8 quantOffsetType; - opus_int8 NLSFInterpCoef_Q2; - opus_int8 PERIndex; - opus_int8 LTP_scaleIndex; - opus_int8 Seed; -} SideInfoIndices; - -/********************************/ -/* Encoder state */ -/********************************/ -typedef struct { - opus_int32 In_HP_State[ 2 ]; /* High pass filter state */ - opus_int32 variable_HP_smth1_Q15; /* State of first smoother */ - opus_int32 variable_HP_smth2_Q15; /* State of second smoother */ - silk_LP_state sLP; /* Low pass filter state */ - silk_VAD_state sVAD; /* Voice activity detector state */ - silk_nsq_state sNSQ; /* Noise Shape Quantizer State */ - opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ]; /* Previously quantized NLSF vector */ - opus_int speech_activity_Q8; /* Speech activity */ - opus_int allow_bandwidth_switch; /* Flag indicating that switching of internal bandwidth is allowed */ - opus_int8 LBRRprevLastGainIndex; - opus_int8 prevSignalType; - opus_int prevLag; - opus_int pitch_LPC_win_length; - opus_int max_pitch_lag; /* Highest possible pitch lag (samples) */ - opus_int32 API_fs_Hz; /* API sampling frequency (Hz) */ - opus_int32 prev_API_fs_Hz; /* Previous API sampling frequency (Hz) */ - opus_int maxInternal_fs_Hz; /* Maximum internal sampling frequency (Hz) */ - opus_int minInternal_fs_Hz; /* Minimum internal sampling frequency (Hz) */ - opus_int desiredInternal_fs_Hz; /* Soft request for internal sampling frequency (Hz) */ - opus_int fs_kHz; /* Internal sampling frequency (kHz) */ - opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ - opus_int frame_length; /* Frame length (samples) */ - opus_int subfr_length; /* Subframe length (samples) */ - opus_int ltp_mem_length; /* Length of LTP memory */ - opus_int la_pitch; /* Look-ahead for pitch analysis (samples) */ - opus_int la_shape; /* Look-ahead for noise shape analysis (samples) */ - opus_int shapeWinLength; /* Window length for noise shape analysis (samples) */ - opus_int32 TargetRate_bps; /* Target bitrate (bps) */ - opus_int PacketSize_ms; /* Number of milliseconds to put in each packet */ - opus_int PacketLoss_perc; /* Packet loss rate measured by farend */ - opus_int32 frameCounter; - opus_int Complexity; /* Complexity setting */ - opus_int nStatesDelayedDecision; /* Number of states in delayed decision quantization */ - opus_int useInterpolatedNLSFs; /* Flag for using NLSF interpolation */ - opus_int shapingLPCOrder; /* Filter order for noise shaping filters */ - opus_int predictLPCOrder; /* Filter order for prediction filters */ - opus_int pitchEstimationComplexity; /* Complexity level for pitch estimator */ - opus_int pitchEstimationLPCOrder; /* Whitening filter order for pitch estimator */ - opus_int32 pitchEstimationThreshold_Q16; /* Threshold for pitch estimator */ - opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */ - opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */ - opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */ - opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */ - opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */ - opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */ - opus_int warping_Q16; /* Warping parameter for warped noise shaping */ - opus_int useCBR; /* Flag to enable constant bitrate */ - opus_int prefillFlag; /* Flag to indicate that only buffers are prefilled, no coding */ - const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ - const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ - const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */ - opus_int input_quality_bands_Q15[ VAD_N_BANDS ]; - opus_int input_tilt_Q15; - opus_int SNR_dB_Q7; /* Quality setting */ - - opus_int8 VAD_flags[ MAX_FRAMES_PER_PACKET ]; - opus_int8 LBRR_flag; - opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ]; - - SideInfoIndices indices; - opus_int8 pulses[ MAX_FRAME_LENGTH ]; - - int arch; - - /* Input/output buffering */ - opus_int16 inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal */ - opus_int inputBufIx; - opus_int nFramesPerPacket; - opus_int nFramesEncoded; /* Number of frames analyzed in current packet */ - - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int channelNb; - - /* Parameters For LTP scaling Control */ - opus_int frames_since_onset; - - /* Specifically for entropy coding */ - opus_int ec_prevSignalType; - opus_int16 ec_prevLagIndex; - - silk_resampler_state_struct resampler_state; - - /* DTX */ - opus_int useDTX; /* Flag to enable DTX */ - opus_int inDTX; /* Flag to signal DTX period */ - opus_int noSpeechCounter; /* Counts concecutive nonactive frames, used by DTX */ - - /* Inband Low Bitrate Redundancy (LBRR) data */ - opus_int useInBandFEC; /* Saves the API setting for query */ - opus_int LBRR_enabled; /* Depends on useInBandFRC, bitrate and packet loss rate */ - opus_int LBRR_GainIncreases; /* Gains increment for coding LBRR frames */ - SideInfoIndices indices_LBRR[ MAX_FRAMES_PER_PACKET ]; - opus_int8 pulses_LBRR[ MAX_FRAMES_PER_PACKET ][ MAX_FRAME_LENGTH ]; -} silk_encoder_state; - - -/* Struct for Packet Loss Concealment */ -typedef struct { - opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */ - opus_int16 LTPCoef_Q14[ LTP_ORDER ]; /* LTP coeficients to use for voiced concealment */ - opus_int16 prevLPC_Q12[ MAX_LPC_ORDER ]; - opus_int last_frame_lost; /* Was previous frame lost */ - opus_int32 rand_seed; /* Seed for unvoiced signal generation */ - opus_int16 randScale_Q14; /* Scaling of unvoiced random signal */ - opus_int32 conc_energy; - opus_int conc_energy_shift; - opus_int16 prevLTP_scale_Q14; - opus_int32 prevGain_Q16[ 2 ]; - opus_int fs_kHz; - opus_int nb_subfr; - opus_int subfr_length; -} silk_PLC_struct; - -/* Struct for CNG */ -typedef struct { - opus_int32 CNG_exc_buf_Q14[ MAX_FRAME_LENGTH ]; - opus_int16 CNG_smth_NLSF_Q15[ MAX_LPC_ORDER ]; - opus_int32 CNG_synth_state[ MAX_LPC_ORDER ]; - opus_int32 CNG_smth_Gain_Q16; - opus_int32 rand_seed; - opus_int fs_kHz; -} silk_CNG_struct; - -/********************************/ -/* Decoder state */ -/********************************/ -typedef struct { - opus_int32 prev_gain_Q16; - opus_int32 exc_Q14[ MAX_FRAME_LENGTH ]; - opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ]; - opus_int16 outBuf[ MAX_FRAME_LENGTH + 2 * MAX_SUB_FRAME_LENGTH ]; /* Buffer for output signal */ - opus_int lagPrev; /* Previous Lag */ - opus_int8 LastGainIndex; /* Previous gain index */ - opus_int fs_kHz; /* Sampling frequency in kHz */ - opus_int32 fs_API_hz; /* API sample frequency (Hz) */ - opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ - opus_int frame_length; /* Frame length (samples) */ - opus_int subfr_length; /* Subframe length (samples) */ - opus_int ltp_mem_length; /* Length of LTP memory */ - opus_int LPC_order; /* LPC order */ - opus_int16 prevNLSF_Q15[ MAX_LPC_ORDER ]; /* Used to interpolate LSFs */ - opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation */ - const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ - const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ - - /* For buffering payload in case of more frames per packet */ - opus_int nFramesDecoded; - opus_int nFramesPerPacket; - - /* Specifically for entropy coding */ - opus_int ec_prevSignalType; - opus_int16 ec_prevLagIndex; - - opus_int VAD_flags[ MAX_FRAMES_PER_PACKET ]; - opus_int LBRR_flag; - opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ]; - - silk_resampler_state_struct resampler_state; - - const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */ - - /* Quantization indices */ - SideInfoIndices indices; - - /* CNG state */ - silk_CNG_struct sCNG; - - /* Stuff used for PLC */ - opus_int lossCnt; - opus_int prevSignalType; - - silk_PLC_struct sPLC; - -} silk_decoder_state; - -/************************/ -/* Decoder control */ -/************************/ -typedef struct { - /* Prediction and coding parameters */ - opus_int pitchL[ MAX_NB_SUBFR ]; - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - /* Holds interpolated and final coefficients, 4-byte aligned */ - silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; - opus_int LTP_scale_Q14; -} silk_decoder_control; - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/sum_sqr_shift.c b/thirdparty/opus/silk/sum_sqr_shift.c deleted file mode 100644 index 129df191d8..0000000000 --- a/thirdparty/opus/silk/sum_sqr_shift.c +++ /dev/null @@ -1,86 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Compute number of bits to right shift the sum of squares of a vector */ -/* of int16s to make it fit in an int32 */ -void silk_sum_sqr_shift( - opus_int32 *energy, /* O Energy of x, after shifting to the right */ - opus_int *shift, /* O Number of bits right shift applied to energy */ - const opus_int16 *x, /* I Input vector */ - opus_int len /* I Length of input vector */ -) -{ - opus_int i, shft; - opus_int32 nrg_tmp, nrg; - - nrg = 0; - shft = 0; - len--; - for( i = 0; i < len; i += 2 ) { - nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] ); - nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] ); - if( nrg < 0 ) { - /* Scale down */ - nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); - shft = 2; - i+=2; - break; - } - } - for( ; i < len; i += 2 ) { - nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); - nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] ); - nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft ); - if( nrg < 0 ) { - /* Scale down */ - nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); - shft += 2; - } - } - if( i == len ) { - /* One sample left to process */ - nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); - nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft ); - } - - /* Make sure to have at least one extra leading zero (two leading zeros in total) */ - if( nrg & 0xC0000000 ) { - nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); - shft += 2; - } - - /* Output arguments */ - *shift = shft; - *energy = nrg; -} - diff --git a/thirdparty/opus/silk/table_LSF_cos.c b/thirdparty/opus/silk/table_LSF_cos.c deleted file mode 100644 index ec9dc63927..0000000000 --- a/thirdparty/opus/silk/table_LSF_cos.c +++ /dev/null @@ -1,70 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -/* Cosine approximation table for LSF conversion */ -/* Q12 values (even) */ -const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ] = { - 8192, 8190, 8182, 8170, - 8152, 8130, 8104, 8072, - 8034, 7994, 7946, 7896, - 7840, 7778, 7714, 7644, - 7568, 7490, 7406, 7318, - 7226, 7128, 7026, 6922, - 6812, 6698, 6580, 6458, - 6332, 6204, 6070, 5934, - 5792, 5648, 5502, 5352, - 5198, 5040, 4880, 4718, - 4552, 4382, 4212, 4038, - 3862, 3684, 3502, 3320, - 3136, 2948, 2760, 2570, - 2378, 2186, 1990, 1794, - 1598, 1400, 1202, 1002, - 802, 602, 402, 202, - 0, -202, -402, -602, - -802, -1002, -1202, -1400, - -1598, -1794, -1990, -2186, - -2378, -2570, -2760, -2948, - -3136, -3320, -3502, -3684, - -3862, -4038, -4212, -4382, - -4552, -4718, -4880, -5040, - -5198, -5352, -5502, -5648, - -5792, -5934, -6070, -6204, - -6332, -6458, -6580, -6698, - -6812, -6922, -7026, -7128, - -7226, -7318, -7406, -7490, - -7568, -7644, -7714, -7778, - -7840, -7896, -7946, -7994, - -8034, -8072, -8104, -8130, - -8152, -8170, -8182, -8190, - -8192 -}; diff --git a/thirdparty/opus/silk/tables.h b/thirdparty/opus/silk/tables.h deleted file mode 100644 index 7fea6fda39..0000000000 --- a/thirdparty/opus/silk/tables.h +++ /dev/null @@ -1,122 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_TABLES_H -#define SILK_TABLES_H - -#include "define.h" -#include "structs.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Entropy coding tables (with size in bytes indicated) */ -extern const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ]; /* 24 */ -extern const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ]; /* 41 */ - -extern const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ];/* 32 */ -extern const opus_uint8 silk_pitch_delta_iCDF[ 21 ]; /* 21 */ -extern const opus_uint8 silk_pitch_contour_iCDF[ 34 ]; /* 34 */ -extern const opus_uint8 silk_pitch_contour_NB_iCDF[ 11 ]; /* 11 */ -extern const opus_uint8 silk_pitch_contour_10_ms_iCDF[ 12 ]; /* 12 */ -extern const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[ 3 ]; /* 3 */ - -extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ SILK_MAX_PULSES + 2 ]; /* 180 */ -extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ SILK_MAX_PULSES + 2 ]; /* 162 */ - -extern const opus_uint8 silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ -extern const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ - -extern const opus_uint8 silk_max_pulses_table[ 4 ]; /* 4 */ - -extern const opus_uint8 silk_shell_code_table0[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table1[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table2[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table3[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table_offsets[ SILK_MAX_PULSES + 1 ]; /* 17 */ - -extern const opus_uint8 silk_lsb_iCDF[ 2 ]; /* 2 */ - -extern const opus_uint8 silk_sign_iCDF[ 42 ]; /* 42 */ - -extern const opus_uint8 silk_uniform3_iCDF[ 3 ]; /* 3 */ -extern const opus_uint8 silk_uniform4_iCDF[ 4 ]; /* 4 */ -extern const opus_uint8 silk_uniform5_iCDF[ 5 ]; /* 5 */ -extern const opus_uint8 silk_uniform6_iCDF[ 6 ]; /* 6 */ -extern const opus_uint8 silk_uniform8_iCDF[ 8 ]; /* 8 */ - -extern const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ]; /* 7 */ - -extern const opus_uint8 silk_LTP_per_index_iCDF[ 3 ]; /* 3 */ -extern const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ]; /* 3 */ -extern const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ]; /* 3 */ -extern const opus_int16 silk_LTP_gain_middle_avg_RD_Q14; -extern const opus_int8 * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ]; /* 168 */ -extern const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS]; - -extern const opus_int8 silk_LTP_vq_sizes[ NB_LTP_CBKS ]; /* 3 */ - -extern const opus_uint8 silk_LTPscale_iCDF[ 3 ]; /* 4 */ -extern const opus_int16 silk_LTPScales_table_Q14[ 3 ]; /* 6 */ - -extern const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ]; /* 4 */ -extern const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ]; /* 2 */ - -extern const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ]; /* 32 */ -extern const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ]; /* 25 */ -extern const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ]; /* 2 */ - -extern const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ]; /* 10 */ - -extern const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ]; /* 5 */ - -extern const silk_NLSF_CB_struct silk_NLSF_CB_WB; /* 1040 */ -extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB; /* 728 */ - -/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ -extern const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ]; /* 32 */ -extern const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ]; /* 32 */ -extern const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ]; /* 32 */ -extern const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ]; /* 32 */ - -/* Quantization offsets */ -extern const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ]; /* 8 */ - -/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ -extern const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ]; /* 60 */ -extern const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ]; /* 60 */ - -/* Rom table with cosine values */ -extern const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ]; /* 258 */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/tables_LTP.c b/thirdparty/opus/silk/tables_LTP.c deleted file mode 100644 index 0e6a0254d5..0000000000 --- a/thirdparty/opus/silk/tables_LTP.c +++ /dev/null @@ -1,296 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -const opus_uint8 silk_LTP_per_index_iCDF[3] = { - 179, 99, 0 -}; - -static const opus_uint8 silk_LTP_gain_iCDF_0[8] = { - 71, 56, 43, 30, 21, 12, 6, 0 -}; - -static const opus_uint8 silk_LTP_gain_iCDF_1[16] = { - 199, 165, 144, 124, 109, 96, 84, 71, - 61, 51, 42, 32, 23, 15, 8, 0 -}; - -static const opus_uint8 silk_LTP_gain_iCDF_2[32] = { - 241, 225, 211, 199, 187, 175, 164, 153, - 142, 132, 123, 114, 105, 96, 88, 80, - 72, 64, 57, 50, 44, 38, 33, 29, - 24, 20, 16, 12, 9, 5, 2, 0 -}; - -const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304; - -static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = { - 15, 131, 138, 138, 155, 155, 173, 173 -}; - -static const opus_uint8 silk_LTP_gain_BITS_Q5_1[16] = { - 69, 93, 115, 118, 131, 138, 141, 138, - 150, 150, 155, 150, 155, 160, 166, 160 -}; - -static const opus_uint8 silk_LTP_gain_BITS_Q5_2[32] = { - 131, 128, 134, 141, 141, 141, 145, 145, - 145, 150, 155, 155, 155, 155, 160, 160, - 160, 160, 166, 166, 173, 173, 182, 192, - 182, 192, 192, 192, 205, 192, 205, 224 -}; - -const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[NB_LTP_CBKS] = { - silk_LTP_gain_iCDF_0, - silk_LTP_gain_iCDF_1, - silk_LTP_gain_iCDF_2 -}; - -const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[NB_LTP_CBKS] = { - silk_LTP_gain_BITS_Q5_0, - silk_LTP_gain_BITS_Q5_1, - silk_LTP_gain_BITS_Q5_2 -}; - -static const opus_int8 silk_LTP_gain_vq_0[8][5] = -{ -{ - 4, 6, 24, 7, 5 -}, -{ - 0, 0, 2, 0, 0 -}, -{ - 12, 28, 41, 13, -4 -}, -{ - -9, 15, 42, 25, 14 -}, -{ - 1, -2, 62, 41, -9 -}, -{ - -10, 37, 65, -4, 3 -}, -{ - -6, 4, 66, 7, -8 -}, -{ - 16, 14, 38, -3, 33 -} -}; - -static const opus_int8 silk_LTP_gain_vq_1[16][5] = -{ -{ - 13, 22, 39, 23, 12 -}, -{ - -1, 36, 64, 27, -6 -}, -{ - -7, 10, 55, 43, 17 -}, -{ - 1, 1, 8, 1, 1 -}, -{ - 6, -11, 74, 53, -9 -}, -{ - -12, 55, 76, -12, 8 -}, -{ - -3, 3, 93, 27, -4 -}, -{ - 26, 39, 59, 3, -8 -}, -{ - 2, 0, 77, 11, 9 -}, -{ - -8, 22, 44, -6, 7 -}, -{ - 40, 9, 26, 3, 9 -}, -{ - -7, 20, 101, -7, 4 -}, -{ - 3, -8, 42, 26, 0 -}, -{ - -15, 33, 68, 2, 23 -}, -{ - -2, 55, 46, -2, 15 -}, -{ - 3, -1, 21, 16, 41 -} -}; - -static const opus_int8 silk_LTP_gain_vq_2[32][5] = -{ -{ - -6, 27, 61, 39, 5 -}, -{ - -11, 42, 88, 4, 1 -}, -{ - -2, 60, 65, 6, -4 -}, -{ - -1, -5, 73, 56, 1 -}, -{ - -9, 19, 94, 29, -9 -}, -{ - 0, 12, 99, 6, 4 -}, -{ - 8, -19, 102, 46, -13 -}, -{ - 3, 2, 13, 3, 2 -}, -{ - 9, -21, 84, 72, -18 -}, -{ - -11, 46, 104, -22, 8 -}, -{ - 18, 38, 48, 23, 0 -}, -{ - -16, 70, 83, -21, 11 -}, -{ - 5, -11, 117, 22, -8 -}, -{ - -6, 23, 117, -12, 3 -}, -{ - 3, -8, 95, 28, 4 -}, -{ - -10, 15, 77, 60, -15 -}, -{ - -1, 4, 124, 2, -4 -}, -{ - 3, 38, 84, 24, -25 -}, -{ - 2, 13, 42, 13, 31 -}, -{ - 21, -4, 56, 46, -1 -}, -{ - -1, 35, 79, -13, 19 -}, -{ - -7, 65, 88, -9, -14 -}, -{ - 20, 4, 81, 49, -29 -}, -{ - 20, 0, 75, 3, -17 -}, -{ - 5, -9, 44, 92, -8 -}, -{ - 1, -3, 22, 69, 31 -}, -{ - -6, 95, 41, -12, 5 -}, -{ - 39, 67, 16, -4, 1 -}, -{ - 0, -6, 120, 55, -36 -}, -{ - -13, 44, 122, 4, -24 -}, -{ - 81, 5, 11, 3, 7 -}, -{ - 2, 0, 9, 10, 88 -} -}; - -const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = { - (opus_int8 *)&silk_LTP_gain_vq_0[0][0], - (opus_int8 *)&silk_LTP_gain_vq_1[0][0], - (opus_int8 *)&silk_LTP_gain_vq_2[0][0] -}; - -/* Maximum frequency-dependent response of the pitch taps above, - computed as max(abs(freqz(taps))) */ -static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = { - 46, 2, 90, 87, 93, 91, 82, 98 -}; - -static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = { - 109, 120, 118, 12, 113, 115, 117, 119, - 99, 59, 87, 111, 63, 111, 112, 80 -}; - -static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = { - 126, 124, 125, 124, 129, 121, 126, 23, - 132, 127, 127, 127, 126, 127, 122, 133, - 130, 134, 101, 118, 119, 145, 126, 86, - 124, 120, 123, 119, 170, 173, 107, 109 -}; - -const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = { - &silk_LTP_gain_vq_0_gain[0], - &silk_LTP_gain_vq_1_gain[0], - &silk_LTP_gain_vq_2_gain[0] -}; - -const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = { - 8, 16, 32 -}; diff --git a/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c b/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c deleted file mode 100644 index 8c59d207aa..0000000000 --- a/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c +++ /dev/null @@ -1,159 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = { - 12, 35, 60, 83, 108, 132, 157, 180, - 206, 228, 15, 32, 55, 77, 101, 125, - 151, 175, 201, 225, 19, 42, 66, 89, - 114, 137, 162, 184, 209, 230, 12, 25, - 50, 72, 97, 120, 147, 172, 200, 223, - 26, 44, 69, 90, 114, 135, 159, 180, - 205, 225, 13, 22, 53, 80, 106, 130, - 156, 180, 205, 228, 15, 25, 44, 64, - 90, 115, 142, 168, 196, 222, 19, 24, - 62, 82, 100, 120, 145, 168, 190, 214, - 22, 31, 50, 79, 103, 120, 151, 170, - 203, 227, 21, 29, 45, 65, 106, 124, - 150, 171, 196, 224, 30, 49, 75, 97, - 121, 142, 165, 186, 209, 229, 19, 25, - 52, 70, 93, 116, 143, 166, 192, 219, - 26, 34, 62, 75, 97, 118, 145, 167, - 194, 217, 25, 33, 56, 70, 91, 113, - 143, 165, 196, 223, 21, 34, 51, 72, - 97, 117, 145, 171, 196, 222, 20, 29, - 50, 67, 90, 117, 144, 168, 197, 221, - 22, 31, 48, 66, 95, 117, 146, 168, - 196, 222, 24, 33, 51, 77, 116, 134, - 158, 180, 200, 224, 21, 28, 70, 87, - 106, 124, 149, 170, 194, 217, 26, 33, - 53, 64, 83, 117, 152, 173, 204, 225, - 27, 34, 65, 95, 108, 129, 155, 174, - 210, 225, 20, 26, 72, 99, 113, 131, - 154, 176, 200, 219, 34, 43, 61, 78, - 93, 114, 155, 177, 205, 229, 23, 29, - 54, 97, 124, 138, 163, 179, 209, 229, - 30, 38, 56, 89, 118, 129, 158, 178, - 200, 231, 21, 29, 49, 63, 85, 111, - 142, 163, 193, 222, 27, 48, 77, 103, - 133, 158, 179, 196, 215, 232, 29, 47, - 74, 99, 124, 151, 176, 198, 220, 237, - 33, 42, 61, 76, 93, 121, 155, 174, - 207, 225, 29, 53, 87, 112, 136, 154, - 170, 188, 208, 227, 24, 30, 52, 84, - 131, 150, 166, 186, 203, 229, 37, 48, - 64, 84, 104, 118, 156, 177, 201, 230 -}; - -static const opus_uint8 silk_NLSF_CB1_iCDF_NB_MB[ 64 ] = { - 212, 178, 148, 129, 108, 96, 85, 82, - 79, 77, 61, 59, 57, 56, 51, 49, - 48, 45, 42, 41, 40, 38, 36, 34, - 31, 30, 21, 12, 10, 3, 1, 0, - 255, 245, 244, 236, 233, 225, 217, 203, - 190, 176, 175, 161, 149, 136, 125, 114, - 102, 91, 81, 71, 60, 52, 43, 35, - 28, 20, 19, 18, 12, 11, 5, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_SELECT_NB_MB[ 160 ] = { - 16, 0, 0, 0, 0, 99, 66, 36, - 36, 34, 36, 34, 34, 34, 34, 83, - 69, 36, 52, 34, 116, 102, 70, 68, - 68, 176, 102, 68, 68, 34, 65, 85, - 68, 84, 36, 116, 141, 152, 139, 170, - 132, 187, 184, 216, 137, 132, 249, 168, - 185, 139, 104, 102, 100, 68, 68, 178, - 218, 185, 185, 170, 244, 216, 187, 187, - 170, 244, 187, 187, 219, 138, 103, 155, - 184, 185, 137, 116, 183, 155, 152, 136, - 132, 217, 184, 184, 170, 164, 217, 171, - 155, 139, 244, 169, 184, 185, 170, 164, - 216, 223, 218, 138, 214, 143, 188, 218, - 168, 244, 141, 136, 155, 170, 168, 138, - 220, 219, 139, 164, 219, 202, 216, 137, - 168, 186, 246, 185, 139, 116, 185, 219, - 185, 138, 100, 100, 134, 100, 102, 34, - 68, 68, 100, 68, 168, 203, 221, 218, - 168, 167, 154, 136, 104, 70, 164, 246, - 171, 137, 139, 137, 155, 218, 219, 139 -}; - -static const opus_uint8 silk_NLSF_CB2_iCDF_NB_MB[ 72 ] = { - 255, 254, 253, 238, 14, 3, 2, 1, - 0, 255, 254, 252, 218, 35, 3, 2, - 1, 0, 255, 254, 250, 208, 59, 4, - 2, 1, 0, 255, 254, 246, 194, 71, - 10, 2, 1, 0, 255, 252, 236, 183, - 82, 8, 2, 1, 0, 255, 252, 235, - 180, 90, 17, 2, 1, 0, 255, 248, - 224, 171, 97, 30, 4, 1, 0, 255, - 254, 236, 173, 95, 37, 7, 1, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_BITS_NB_MB_Q5[ 72 ] = { - 255, 255, 255, 131, 6, 145, 255, 255, - 255, 255, 255, 236, 93, 15, 96, 255, - 255, 255, 255, 255, 194, 83, 25, 71, - 221, 255, 255, 255, 255, 162, 73, 34, - 66, 162, 255, 255, 255, 210, 126, 73, - 43, 57, 173, 255, 255, 255, 201, 125, - 71, 48, 58, 130, 255, 255, 255, 166, - 110, 73, 57, 62, 104, 210, 255, 255, - 251, 123, 65, 55, 68, 100, 171, 255 -}; - -static const opus_uint8 silk_NLSF_PRED_NB_MB_Q8[ 18 ] = { - 179, 138, 140, 148, 151, 149, 153, 151, - 163, 116, 67, 82, 59, 92, 72, 100, - 89, 92 -}; - -static const opus_int16 silk_NLSF_DELTA_MIN_NB_MB_Q15[ 11 ] = { - 250, 3, 6, 3, 3, 3, 4, 3, - 3, 3, 461 -}; - -const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB = -{ - 32, - 10, - SILK_FIX_CONST( 0.18, 16 ), - SILK_FIX_CONST( 1.0 / 0.18, 6 ), - silk_NLSF_CB1_NB_MB_Q8, - silk_NLSF_CB1_iCDF_NB_MB, - silk_NLSF_PRED_NB_MB_Q8, - silk_NLSF_CB2_SELECT_NB_MB, - silk_NLSF_CB2_iCDF_NB_MB, - silk_NLSF_CB2_BITS_NB_MB_Q5, - silk_NLSF_DELTA_MIN_NB_MB_Q15, -}; diff --git a/thirdparty/opus/silk/tables_NLSF_CB_WB.c b/thirdparty/opus/silk/tables_NLSF_CB_WB.c deleted file mode 100644 index 50af87eb2e..0000000000 --- a/thirdparty/opus/silk/tables_NLSF_CB_WB.c +++ /dev/null @@ -1,198 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = { - 7, 23, 38, 54, 69, 85, 100, 116, - 131, 147, 162, 178, 193, 208, 223, 239, - 13, 25, 41, 55, 69, 83, 98, 112, - 127, 142, 157, 171, 187, 203, 220, 236, - 15, 21, 34, 51, 61, 78, 92, 106, - 126, 136, 152, 167, 185, 205, 225, 240, - 10, 21, 36, 50, 63, 79, 95, 110, - 126, 141, 157, 173, 189, 205, 221, 237, - 17, 20, 37, 51, 59, 78, 89, 107, - 123, 134, 150, 164, 184, 205, 224, 240, - 10, 15, 32, 51, 67, 81, 96, 112, - 129, 142, 158, 173, 189, 204, 220, 236, - 8, 21, 37, 51, 65, 79, 98, 113, - 126, 138, 155, 168, 179, 192, 209, 218, - 12, 15, 34, 55, 63, 78, 87, 108, - 118, 131, 148, 167, 185, 203, 219, 236, - 16, 19, 32, 36, 56, 79, 91, 108, - 118, 136, 154, 171, 186, 204, 220, 237, - 11, 28, 43, 58, 74, 89, 105, 120, - 135, 150, 165, 180, 196, 211, 226, 241, - 6, 16, 33, 46, 60, 75, 92, 107, - 123, 137, 156, 169, 185, 199, 214, 225, - 11, 19, 30, 44, 57, 74, 89, 105, - 121, 135, 152, 169, 186, 202, 218, 234, - 12, 19, 29, 46, 57, 71, 88, 100, - 120, 132, 148, 165, 182, 199, 216, 233, - 17, 23, 35, 46, 56, 77, 92, 106, - 123, 134, 152, 167, 185, 204, 222, 237, - 14, 17, 45, 53, 63, 75, 89, 107, - 115, 132, 151, 171, 188, 206, 221, 240, - 9, 16, 29, 40, 56, 71, 88, 103, - 119, 137, 154, 171, 189, 205, 222, 237, - 16, 19, 36, 48, 57, 76, 87, 105, - 118, 132, 150, 167, 185, 202, 218, 236, - 12, 17, 29, 54, 71, 81, 94, 104, - 126, 136, 149, 164, 182, 201, 221, 237, - 15, 28, 47, 62, 79, 97, 115, 129, - 142, 155, 168, 180, 194, 208, 223, 238, - 8, 14, 30, 45, 62, 78, 94, 111, - 127, 143, 159, 175, 192, 207, 223, 239, - 17, 30, 49, 62, 79, 92, 107, 119, - 132, 145, 160, 174, 190, 204, 220, 235, - 14, 19, 36, 45, 61, 76, 91, 108, - 121, 138, 154, 172, 189, 205, 222, 238, - 12, 18, 31, 45, 60, 76, 91, 107, - 123, 138, 154, 171, 187, 204, 221, 236, - 13, 17, 31, 43, 53, 70, 83, 103, - 114, 131, 149, 167, 185, 203, 220, 237, - 17, 22, 35, 42, 58, 78, 93, 110, - 125, 139, 155, 170, 188, 206, 224, 240, - 8, 15, 34, 50, 67, 83, 99, 115, - 131, 146, 162, 178, 193, 209, 224, 239, - 13, 16, 41, 66, 73, 86, 95, 111, - 128, 137, 150, 163, 183, 206, 225, 241, - 17, 25, 37, 52, 63, 75, 92, 102, - 119, 132, 144, 160, 175, 191, 212, 231, - 19, 31, 49, 65, 83, 100, 117, 133, - 147, 161, 174, 187, 200, 213, 227, 242, - 18, 31, 52, 68, 88, 103, 117, 126, - 138, 149, 163, 177, 192, 207, 223, 239, - 16, 29, 47, 61, 76, 90, 106, 119, - 133, 147, 161, 176, 193, 209, 224, 240, - 15, 21, 35, 50, 61, 73, 86, 97, - 110, 119, 129, 141, 175, 198, 218, 237 -}; - -static const opus_uint8 silk_NLSF_CB1_iCDF_WB[ 64 ] = { - 225, 204, 201, 184, 183, 175, 158, 154, - 153, 135, 119, 115, 113, 110, 109, 99, - 98, 95, 79, 68, 52, 50, 48, 45, - 43, 32, 31, 27, 18, 10, 3, 0, - 255, 251, 235, 230, 212, 201, 196, 182, - 167, 166, 163, 151, 138, 124, 110, 104, - 90, 78, 76, 70, 69, 57, 45, 34, - 24, 21, 11, 6, 5, 4, 3, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_SELECT_WB[ 256 ] = { - 0, 0, 0, 0, 0, 0, 0, 1, - 100, 102, 102, 68, 68, 36, 34, 96, - 164, 107, 158, 185, 180, 185, 139, 102, - 64, 66, 36, 34, 34, 0, 1, 32, - 208, 139, 141, 191, 152, 185, 155, 104, - 96, 171, 104, 166, 102, 102, 102, 132, - 1, 0, 0, 0, 0, 16, 16, 0, - 80, 109, 78, 107, 185, 139, 103, 101, - 208, 212, 141, 139, 173, 153, 123, 103, - 36, 0, 0, 0, 0, 0, 0, 1, - 48, 0, 0, 0, 0, 0, 0, 32, - 68, 135, 123, 119, 119, 103, 69, 98, - 68, 103, 120, 118, 118, 102, 71, 98, - 134, 136, 157, 184, 182, 153, 139, 134, - 208, 168, 248, 75, 189, 143, 121, 107, - 32, 49, 34, 34, 34, 0, 17, 2, - 210, 235, 139, 123, 185, 137, 105, 134, - 98, 135, 104, 182, 100, 183, 171, 134, - 100, 70, 68, 70, 66, 66, 34, 131, - 64, 166, 102, 68, 36, 2, 1, 0, - 134, 166, 102, 68, 34, 34, 66, 132, - 212, 246, 158, 139, 107, 107, 87, 102, - 100, 219, 125, 122, 137, 118, 103, 132, - 114, 135, 137, 105, 171, 106, 50, 34, - 164, 214, 141, 143, 185, 151, 121, 103, - 192, 34, 0, 0, 0, 0, 0, 1, - 208, 109, 74, 187, 134, 249, 159, 137, - 102, 110, 154, 118, 87, 101, 119, 101, - 0, 2, 0, 36, 36, 66, 68, 35, - 96, 164, 102, 100, 36, 0, 2, 33, - 167, 138, 174, 102, 100, 84, 2, 2, - 100, 107, 120, 119, 36, 197, 24, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_iCDF_WB[ 72 ] = { - 255, 254, 253, 244, 12, 3, 2, 1, - 0, 255, 254, 252, 224, 38, 3, 2, - 1, 0, 255, 254, 251, 209, 57, 4, - 2, 1, 0, 255, 254, 244, 195, 69, - 4, 2, 1, 0, 255, 251, 232, 184, - 84, 7, 2, 1, 0, 255, 254, 240, - 186, 86, 14, 2, 1, 0, 255, 254, - 239, 178, 91, 30, 5, 1, 0, 255, - 248, 227, 177, 100, 19, 2, 1, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_BITS_WB_Q5[ 72 ] = { - 255, 255, 255, 156, 4, 154, 255, 255, - 255, 255, 255, 227, 102, 15, 92, 255, - 255, 255, 255, 255, 213, 83, 24, 72, - 236, 255, 255, 255, 255, 150, 76, 33, - 63, 214, 255, 255, 255, 190, 121, 77, - 43, 55, 185, 255, 255, 255, 245, 137, - 71, 43, 59, 139, 255, 255, 255, 255, - 131, 66, 50, 66, 107, 194, 255, 255, - 166, 116, 76, 55, 53, 125, 255, 255 -}; - -static const opus_uint8 silk_NLSF_PRED_WB_Q8[ 30 ] = { - 175, 148, 160, 176, 178, 173, 174, 164, - 177, 174, 196, 182, 198, 192, 182, 68, - 62, 66, 60, 72, 117, 85, 90, 118, - 136, 151, 142, 160, 142, 155 -}; - -static const opus_int16 silk_NLSF_DELTA_MIN_WB_Q15[ 17 ] = { - 100, 3, 40, 3, 3, 3, 5, 14, - 14, 10, 11, 3, 8, 9, 7, 3, - 347 -}; - -const silk_NLSF_CB_struct silk_NLSF_CB_WB = -{ - 32, - 16, - SILK_FIX_CONST( 0.15, 16 ), - SILK_FIX_CONST( 1.0 / 0.15, 6 ), - silk_NLSF_CB1_WB_Q8, - silk_NLSF_CB1_iCDF_WB, - silk_NLSF_PRED_WB_Q8, - silk_NLSF_CB2_SELECT_WB, - silk_NLSF_CB2_iCDF_WB, - silk_NLSF_CB2_BITS_WB_Q5, - silk_NLSF_DELTA_MIN_WB_Q15, -}; - diff --git a/thirdparty/opus/silk/tables_gain.c b/thirdparty/opus/silk/tables_gain.c deleted file mode 100644 index 37e41d890c..0000000000 --- a/thirdparty/opus/silk/tables_gain.c +++ /dev/null @@ -1,63 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ] = -{ -{ - 224, 112, 44, 15, 3, 2, 1, 0 -}, -{ - 254, 237, 192, 132, 70, 23, 4, 0 -}, -{ - 255, 252, 226, 155, 61, 11, 2, 0 -} -}; - -const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ] = { - 250, 245, 234, 203, 71, 50, 42, 38, - 35, 33, 31, 29, 28, 27, 26, 25, - 24, 23, 22, 21, 20, 19, 18, 17, - 16, 15, 14, 13, 12, 11, 10, 9, - 8, 7, 6, 5, 4, 3, 2, 1, - 0 -}; - -#ifdef __cplusplus -} -#endif diff --git a/thirdparty/opus/silk/tables_other.c b/thirdparty/opus/silk/tables_other.c deleted file mode 100644 index 398686bf26..0000000000 --- a/thirdparty/opus/silk/tables_other.c +++ /dev/null @@ -1,138 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "structs.h" -#include "define.h" -#include "tables.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ -const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = { - 0, 8000, 9400, 11500, 13500, 17500, 25000, MAX_TARGET_RATE_BPS -}; -const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = { - 0, 9000, 12000, 14500, 18500, 24500, 35500, MAX_TARGET_RATE_BPS -}; -const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = { - 0, 10500, 14000, 17000, 21500, 28500, 42000, MAX_TARGET_RATE_BPS -}; -const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = { - 18, 29, 38, 40, 46, 52, 62, 84 -}; - -/* Tables for stereo predictor coding */ -const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = { - -13732, -10050, -8266, -7526, -6500, -5000, -2950, -820, - 820, 2950, 5000, 6500, 7526, 8266, 10050, 13732 -}; -const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ] = { - 249, 247, 246, 245, 244, - 234, 210, 202, 201, 200, - 197, 174, 82, 59, 56, - 55, 54, 46, 22, 12, - 11, 10, 9, 7, 0 -}; -const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ] = { 64, 0 }; - -/* Tables for LBRR flags */ -static const opus_uint8 silk_LBRR_flags_2_iCDF[ 3 ] = { 203, 150, 0 }; -static const opus_uint8 silk_LBRR_flags_3_iCDF[ 7 ] = { 215, 195, 166, 125, 110, 82, 0 }; -const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ] = { - silk_LBRR_flags_2_iCDF, - silk_LBRR_flags_3_iCDF -}; - -/* Table for LSB coding */ -const opus_uint8 silk_lsb_iCDF[ 2 ] = { 120, 0 }; - -/* Tables for LTPScale */ -const opus_uint8 silk_LTPscale_iCDF[ 3 ] = { 128, 64, 0 }; - -/* Tables for signal type and offset coding */ -const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ] = { - 232, 158, 10, 0 -}; -const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ] = { - 230, 0 -}; - -/* Tables for NLSF interpolation factor */ -const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ] = { 243, 221, 192, 181, 0 }; - -/* Quantization offsets */ -const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ] = { - { OFFSET_UVL_Q10, OFFSET_UVH_Q10 }, { OFFSET_VL_Q10, OFFSET_VH_Q10 } -}; - -/* Table for LTPScale */ -const opus_int16 silk_LTPScales_table_Q14[ 3 ] = { 15565, 12288, 8192 }; - -/* Uniform entropy tables */ -const opus_uint8 silk_uniform3_iCDF[ 3 ] = { 171, 85, 0 }; -const opus_uint8 silk_uniform4_iCDF[ 4 ] = { 192, 128, 64, 0 }; -const opus_uint8 silk_uniform5_iCDF[ 5 ] = { 205, 154, 102, 51, 0 }; -const opus_uint8 silk_uniform6_iCDF[ 6 ] = { 213, 171, 128, 85, 43, 0 }; -const opus_uint8 silk_uniform8_iCDF[ 8 ] = { 224, 192, 160, 128, 96, 64, 32, 0 }; - -const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ] = { 100, 40, 16, 7, 3, 1, 0 }; - -/* Elliptic/Cauer filters designed with 0.1 dB passband ripple, - 80 dB minimum stopband attenuation, and - [0.95 : 0.15 : 0.35] normalized cut off frequencies. */ - -/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ -const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ] = -{ -{ 250767114, 501534038, 250767114 }, -{ 209867381, 419732057, 209867381 }, -{ 170987846, 341967853, 170987846 }, -{ 131531482, 263046905, 131531482 }, -{ 89306658, 178584282, 89306658 } -}; - -/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ -const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ] = -{ -{ 506393414, 239854379 }, -{ 411067935, 169683996 }, -{ 306733530, 116694253 }, -{ 185807084, 77959395 }, -{ 35497197, 57401098 } -}; - -#ifdef __cplusplus -} -#endif - diff --git a/thirdparty/opus/silk/tables_pitch_lag.c b/thirdparty/opus/silk/tables_pitch_lag.c deleted file mode 100644 index e80cc59a27..0000000000 --- a/thirdparty/opus/silk/tables_pitch_lag.c +++ /dev/null @@ -1,69 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ] = { - 253, 250, 244, 233, 212, 182, 150, 131, - 120, 110, 98, 85, 72, 60, 49, 40, - 32, 25, 19, 15, 13, 11, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0 -}; - -const opus_uint8 silk_pitch_delta_iCDF[21] = { - 210, 208, 206, 203, 199, 193, 183, 168, - 142, 104, 74, 52, 37, 27, 20, 14, - 10, 6, 4, 2, 0 -}; - -const opus_uint8 silk_pitch_contour_iCDF[34] = { - 223, 201, 183, 167, 152, 138, 124, 111, - 98, 88, 79, 70, 62, 56, 50, 44, - 39, 35, 31, 27, 24, 21, 18, 16, - 14, 12, 10, 8, 6, 4, 3, 2, - 1, 0 -}; - -const opus_uint8 silk_pitch_contour_NB_iCDF[11] = { - 188, 176, 155, 138, 119, 97, 67, 43, - 26, 10, 0 -}; - -const opus_uint8 silk_pitch_contour_10_ms_iCDF[12] = { - 165, 119, 80, 61, 47, 35, 27, 20, - 14, 9, 4, 0 -}; - -const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[3] = { - 113, 63, 0 -}; - - diff --git a/thirdparty/opus/silk/tables_pulses_per_block.c b/thirdparty/opus/silk/tables_pulses_per_block.c deleted file mode 100644 index c7c01c8893..0000000000 --- a/thirdparty/opus/silk/tables_pulses_per_block.c +++ /dev/null @@ -1,264 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -const opus_uint8 silk_max_pulses_table[ 4 ] = { - 8, 10, 12, 16 -}; - -const opus_uint8 silk_pulses_per_block_iCDF[ 10 ][ 18 ] = { -{ - 125, 51, 26, 18, 15, 12, 11, 10, - 9, 8, 7, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 198, 105, 45, 22, 15, 12, 11, 10, - 9, 8, 7, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 213, 162, 116, 83, 59, 43, 32, 24, - 18, 15, 12, 9, 7, 6, 5, 3, - 2, 0 -}, -{ - 239, 187, 116, 59, 28, 16, 11, 10, - 9, 8, 7, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 250, 229, 188, 135, 86, 51, 30, 19, - 13, 10, 8, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 249, 235, 213, 185, 156, 128, 103, 83, - 66, 53, 42, 33, 26, 21, 17, 13, - 10, 0 -}, -{ - 254, 249, 235, 206, 164, 118, 77, 46, - 27, 16, 10, 7, 5, 4, 3, 2, - 1, 0 -}, -{ - 255, 253, 249, 239, 220, 191, 156, 119, - 85, 57, 37, 23, 15, 10, 6, 4, - 2, 0 -}, -{ - 255, 253, 251, 246, 237, 223, 203, 179, - 152, 124, 98, 75, 55, 40, 29, 21, - 15, 0 -}, -{ - 255, 254, 253, 247, 220, 162, 106, 67, - 42, 28, 18, 12, 9, 6, 4, 3, - 2, 0 -} -}; - -const opus_uint8 silk_pulses_per_block_BITS_Q5[ 9 ][ 18 ] = { -{ - 31, 57, 107, 160, 205, 205, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255 -}, -{ - 69, 47, 67, 111, 166, 205, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255 -}, -{ - 82, 74, 79, 95, 109, 128, 145, 160, - 173, 205, 205, 205, 224, 255, 255, 224, - 255, 224 -}, -{ - 125, 74, 59, 69, 97, 141, 182, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255 -}, -{ - 173, 115, 85, 73, 76, 92, 115, 145, - 173, 205, 224, 224, 255, 255, 255, 255, - 255, 255 -}, -{ - 166, 134, 113, 102, 101, 102, 107, 118, - 125, 138, 145, 155, 166, 182, 192, 192, - 205, 150 -}, -{ - 224, 182, 134, 101, 83, 79, 85, 97, - 120, 145, 173, 205, 224, 255, 255, 255, - 255, 255 -}, -{ - 255, 224, 192, 150, 120, 101, 92, 89, - 93, 102, 118, 134, 160, 182, 192, 224, - 224, 224 -}, -{ - 255, 224, 224, 182, 155, 134, 118, 109, - 104, 102, 106, 111, 118, 131, 145, 160, - 173, 131 -} -}; - -const opus_uint8 silk_rate_levels_iCDF[ 2 ][ 9 ] = -{ -{ - 241, 190, 178, 132, 87, 74, 41, 14, - 0 -}, -{ - 223, 193, 157, 140, 106, 57, 39, 18, - 0 -} -}; - -const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ 9 ] = -{ -{ - 131, 74, 141, 79, 80, 138, 95, 104, - 134 -}, -{ - 95, 99, 91, 125, 93, 76, 123, 115, - 123 -} -}; - -const opus_uint8 silk_shell_code_table0[ 152 ] = { - 128, 0, 214, 42, 0, 235, 128, 21, - 0, 244, 184, 72, 11, 0, 248, 214, - 128, 42, 7, 0, 248, 225, 170, 80, - 25, 5, 0, 251, 236, 198, 126, 54, - 18, 3, 0, 250, 238, 211, 159, 82, - 35, 15, 5, 0, 250, 231, 203, 168, - 128, 88, 53, 25, 6, 0, 252, 238, - 216, 185, 148, 108, 71, 40, 18, 4, - 0, 253, 243, 225, 199, 166, 128, 90, - 57, 31, 13, 3, 0, 254, 246, 233, - 212, 183, 147, 109, 73, 44, 23, 10, - 2, 0, 255, 250, 240, 223, 198, 166, - 128, 90, 58, 33, 16, 6, 1, 0, - 255, 251, 244, 231, 210, 181, 146, 110, - 75, 46, 25, 12, 5, 1, 0, 255, - 253, 248, 238, 221, 196, 164, 128, 92, - 60, 35, 18, 8, 3, 1, 0, 255, - 253, 249, 242, 229, 208, 180, 146, 110, - 76, 48, 27, 14, 7, 3, 1, 0 -}; - -const opus_uint8 silk_shell_code_table1[ 152 ] = { - 129, 0, 207, 50, 0, 236, 129, 20, - 0, 245, 185, 72, 10, 0, 249, 213, - 129, 42, 6, 0, 250, 226, 169, 87, - 27, 4, 0, 251, 233, 194, 130, 62, - 20, 4, 0, 250, 236, 207, 160, 99, - 47, 17, 3, 0, 255, 240, 217, 182, - 131, 81, 41, 11, 1, 0, 255, 254, - 233, 201, 159, 107, 61, 20, 2, 1, - 0, 255, 249, 233, 206, 170, 128, 86, - 50, 23, 7, 1, 0, 255, 250, 238, - 217, 186, 148, 108, 70, 39, 18, 6, - 1, 0, 255, 252, 243, 226, 200, 166, - 128, 90, 56, 30, 13, 4, 1, 0, - 255, 252, 245, 231, 209, 180, 146, 110, - 76, 47, 25, 11, 4, 1, 0, 255, - 253, 248, 237, 219, 194, 163, 128, 93, - 62, 37, 19, 8, 3, 1, 0, 255, - 254, 250, 241, 226, 205, 177, 145, 111, - 79, 51, 30, 15, 6, 2, 1, 0 -}; - -const opus_uint8 silk_shell_code_table2[ 152 ] = { - 129, 0, 203, 54, 0, 234, 129, 23, - 0, 245, 184, 73, 10, 0, 250, 215, - 129, 41, 5, 0, 252, 232, 173, 86, - 24, 3, 0, 253, 240, 200, 129, 56, - 15, 2, 0, 253, 244, 217, 164, 94, - 38, 10, 1, 0, 253, 245, 226, 189, - 132, 71, 27, 7, 1, 0, 253, 246, - 231, 203, 159, 105, 56, 23, 6, 1, - 0, 255, 248, 235, 213, 179, 133, 85, - 47, 19, 5, 1, 0, 255, 254, 243, - 221, 194, 159, 117, 70, 37, 12, 2, - 1, 0, 255, 254, 248, 234, 208, 171, - 128, 85, 48, 22, 8, 2, 1, 0, - 255, 254, 250, 240, 220, 189, 149, 107, - 67, 36, 16, 6, 2, 1, 0, 255, - 254, 251, 243, 227, 201, 166, 128, 90, - 55, 29, 13, 5, 2, 1, 0, 255, - 254, 252, 246, 234, 213, 183, 147, 109, - 73, 43, 22, 10, 4, 2, 1, 0 -}; - -const opus_uint8 silk_shell_code_table3[ 152 ] = { - 130, 0, 200, 58, 0, 231, 130, 26, - 0, 244, 184, 76, 12, 0, 249, 214, - 130, 43, 6, 0, 252, 232, 173, 87, - 24, 3, 0, 253, 241, 203, 131, 56, - 14, 2, 0, 254, 246, 221, 167, 94, - 35, 8, 1, 0, 254, 249, 232, 193, - 130, 65, 23, 5, 1, 0, 255, 251, - 239, 211, 162, 99, 45, 15, 4, 1, - 0, 255, 251, 243, 223, 186, 131, 74, - 33, 11, 3, 1, 0, 255, 252, 245, - 230, 202, 158, 105, 57, 24, 8, 2, - 1, 0, 255, 253, 247, 235, 214, 179, - 132, 84, 44, 19, 7, 2, 1, 0, - 255, 254, 250, 240, 223, 196, 159, 112, - 69, 36, 15, 6, 2, 1, 0, 255, - 254, 253, 245, 231, 209, 176, 136, 93, - 55, 27, 11, 3, 2, 1, 0, 255, - 254, 253, 252, 239, 221, 194, 158, 117, - 76, 42, 18, 4, 3, 2, 1, 0 -}; - -const opus_uint8 silk_shell_code_table_offsets[ 17 ] = { - 0, 0, 2, 5, 9, 14, 20, 27, - 35, 44, 54, 65, 77, 90, 104, 119, - 135 -}; - -const opus_uint8 silk_sign_iCDF[ 42 ] = { - 254, 49, 67, 77, 82, 93, 99, - 198, 11, 18, 24, 31, 36, 45, - 255, 46, 66, 78, 87, 94, 104, - 208, 14, 21, 32, 42, 51, 66, - 255, 94, 104, 109, 112, 115, 118, - 248, 53, 69, 80, 88, 95, 102 -}; diff --git a/thirdparty/opus/silk/tuning_parameters.h b/thirdparty/opus/silk/tuning_parameters.h deleted file mode 100644 index 5b8f404235..0000000000 --- a/thirdparty/opus/silk/tuning_parameters.h +++ /dev/null @@ -1,171 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_TUNING_PARAMETERS_H -#define SILK_TUNING_PARAMETERS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Decay time for bitreservoir */ -#define BITRESERVOIR_DECAY_TIME_MS 500 - -/*******************/ -/* Pitch estimator */ -/*******************/ - -/* Level of noise floor for whitening filter LPC analysis in pitch analysis */ -#define FIND_PITCH_WHITE_NOISE_FRACTION 1e-3f - -/* Bandwidth expansion for whitening filter in pitch analysis */ -#define FIND_PITCH_BANDWIDTH_EXPANSION 0.99f - -/*********************/ -/* Linear prediction */ -/*********************/ - -/* LPC analysis regularization */ -#define FIND_LPC_COND_FAC 1e-5f - -/* LTP analysis defines */ -#define FIND_LTP_COND_FAC 1e-5f -#define LTP_DAMPING 0.05f -#define LTP_SMOOTHING 0.1f - -/* LTP quantization settings */ -#define MU_LTP_QUANT_NB 0.03f -#define MU_LTP_QUANT_MB 0.025f -#define MU_LTP_QUANT_WB 0.02f - -/* Max cumulative LTP gain */ -#define MAX_SUM_LOG_GAIN_DB 250.0f - -/***********************/ -/* High pass filtering */ -/***********************/ - -/* Smoothing parameters for low end of pitch frequency range estimation */ -#define VARIABLE_HP_SMTH_COEF1 0.1f -#define VARIABLE_HP_SMTH_COEF2 0.015f -#define VARIABLE_HP_MAX_DELTA_FREQ 0.4f - -/* Min and max cut-off frequency values (-3 dB points) */ -#define VARIABLE_HP_MIN_CUTOFF_HZ 60 -#define VARIABLE_HP_MAX_CUTOFF_HZ 100 - -/***********/ -/* Various */ -/***********/ - -/* VAD threshold */ -#define SPEECH_ACTIVITY_DTX_THRES 0.05f - -/* Speech Activity LBRR enable threshold */ -#define LBRR_SPEECH_ACTIVITY_THRES 0.3f - -/*************************/ -/* Perceptual parameters */ -/*************************/ - -/* reduction in coding SNR during low speech activity */ -#define BG_SNR_DECR_dB 2.0f - -/* factor for reducing quantization noise during voiced speech */ -#define HARM_SNR_INCR_dB 2.0f - -/* factor for reducing quantization noise for unvoiced sparse signals */ -#define SPARSE_SNR_INCR_dB 2.0f - -/* threshold for sparseness measure above which to use lower quantization offset during unvoiced */ -#define SPARSENESS_THRESHOLD_QNT_OFFSET 0.75f - -/* warping control */ -#define WARPING_MULTIPLIER 0.015f - -/* fraction added to first autocorrelation value */ -#define SHAPE_WHITE_NOISE_FRACTION 5e-5f - -/* noise shaping filter chirp factor */ -#define BANDWIDTH_EXPANSION 0.95f - -/* difference between chirp factors for analysis and synthesis noise shaping filters at low bitrates */ -#define LOW_RATE_BANDWIDTH_EXPANSION_DELTA 0.01f - -/* extra harmonic boosting (signal shaping) at low bitrates */ -#define LOW_RATE_HARMONIC_BOOST 0.1f - -/* extra harmonic boosting (signal shaping) for noisy input signals */ -#define LOW_INPUT_QUALITY_HARMONIC_BOOST 0.1f - -/* harmonic noise shaping */ -#define HARMONIC_SHAPING 0.3f - -/* extra harmonic noise shaping for high bitrates or noisy input */ -#define HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING 0.2f - -/* parameter for shaping noise towards higher frequencies */ -#define HP_NOISE_COEF 0.25f - -/* parameter for shaping noise even more towards higher frequencies during voiced speech */ -#define HARM_HP_NOISE_COEF 0.35f - -/* parameter for applying a high-pass tilt to the input signal */ -#define INPUT_TILT 0.05f - -/* parameter for extra high-pass tilt to the input signal at high rates */ -#define HIGH_RATE_INPUT_TILT 0.1f - -/* parameter for reducing noise at the very low frequencies */ -#define LOW_FREQ_SHAPING 4.0f - -/* less reduction of noise at the very low frequencies for signals with low SNR at low frequencies */ -#define LOW_QUALITY_LOW_FREQ_SHAPING_DECR 0.5f - -/* subframe smoothing coefficient for HarmBoost, HarmShapeGain, Tilt (lower -> more smoothing) */ -#define SUBFR_SMTH_COEF 0.4f - -/* parameters defining the R/D tradeoff in the residual quantizer */ -#define LAMBDA_OFFSET 1.2f -#define LAMBDA_SPEECH_ACT -0.2f -#define LAMBDA_DELAYED_DECISIONS -0.05f -#define LAMBDA_INPUT_QUALITY -0.1f -#define LAMBDA_CODING_QUALITY -0.2f -#define LAMBDA_QUANT_OFFSET 0.8f - -/* Compensation in bitrate calculations for 10 ms modes */ -#define REDUCE_BITRATE_10_MS_BPS 2200 - -/* Maximum time before allowing a bandwidth transition */ -#define MAX_BANDWIDTH_SWITCH_DELAY_MS 5000 - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_TUNING_PARAMETERS_H */ diff --git a/thirdparty/opus/silk/typedef.h b/thirdparty/opus/silk/typedef.h deleted file mode 100644 index 97b7e709be..0000000000 --- a/thirdparty/opus/silk/typedef.h +++ /dev/null @@ -1,78 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_TYPEDEF_H -#define SILK_TYPEDEF_H - -#include "opus_types.h" -#include "opus_defines.h" - -#ifndef FIXED_POINT -# include -# define silk_float float -# define silk_float_MAX FLT_MAX -#endif - -#define silk_int64_MAX ((opus_int64)0x7FFFFFFFFFFFFFFFLL) /* 2^63 - 1 */ -#define silk_int64_MIN ((opus_int64)0x8000000000000000LL) /* -2^63 */ -#define silk_int32_MAX 0x7FFFFFFF /* 2^31 - 1 = 2147483647 */ -#define silk_int32_MIN ((opus_int32)0x80000000) /* -2^31 = -2147483648 */ -#define silk_int16_MAX 0x7FFF /* 2^15 - 1 = 32767 */ -#define silk_int16_MIN ((opus_int16)0x8000) /* -2^15 = -32768 */ -#define silk_int8_MAX 0x7F /* 2^7 - 1 = 127 */ -#define silk_int8_MIN ((opus_int8)0x80) /* -2^7 = -128 */ -#define silk_uint8_MAX 0xFF /* 2^8 - 1 = 255 */ - -#define silk_TRUE 1 -#define silk_FALSE 0 - -/* assertions */ -#if (defined _WIN32 && !defined _WINCE && !defined(__GNUC__) && !defined(NO_ASSERTS)) -# ifndef silk_assert -# include /* ASSERTE() */ -# define silk_assert(COND) _ASSERTE(COND) -# endif -#else -# ifdef ENABLE_ASSERTIONS -# include -# include -#define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__); -#ifdef __GNUC__ -__attribute__((noreturn)) -#endif -static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line) -{ - fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); - abort(); -} -# define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}} -# else -# define silk_assert(COND) -# endif -#endif - -#endif /* SILK_TYPEDEF_H */ diff --git a/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c b/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c deleted file mode 100644 index 21d4a8bc1e..0000000000 --- a/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c +++ /dev/null @@ -1,857 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include "main.h" -#include "celt/x86/x86cpu.h" - -#include "stack_alloc.h" - -typedef struct { - opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; - opus_int32 RandState[ DECISION_DELAY ]; - opus_int32 Q_Q10[ DECISION_DELAY ]; - opus_int32 Xq_Q14[ DECISION_DELAY ]; - opus_int32 Pred_Q15[ DECISION_DELAY ]; - opus_int32 Shape_Q14[ DECISION_DELAY ]; - opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_AR_Q14; - opus_int32 Seed; - opus_int32 SeedInit; - opus_int32 RD_Q10; -} NSQ_del_dec_struct; - -typedef struct { - opus_int32 Q_Q10; - opus_int32 RD_Q10; - opus_int32 xq_Q14; - opus_int32 LF_AR_Q14; - opus_int32 sLTP_shp_Q14; - opus_int32 LPC_exc_Q14; -} NSQ_sample_struct; - -typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; - -static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -); - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay /* I */ -); - -void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; - opus_int last_smple_idx, smpl_buf_idx, decisionDelay; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - opus_int32 RDmin_Q10, Gain_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - VARDECL( opus_int32, delayedGain_Q10 ); - VARDECL( NSQ_del_dec_struct, psDelDec ); - NSQ_del_dec_struct *psDD; - SAVE_STACK; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - /* Initialize delayed decision states */ - ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct ); - silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) ); - for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psDD->Seed = ( k + psIndices->Seed ) & 3; - psDD->SeedInit = psDD->Seed; - psDD->RD_Q10 = 0; - psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; - psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; - silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); - } - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - smpl_buf_idx = 0; /* index of oldest samples */ - - decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); - - /* For voiced frames limit the decision delay to lower than the pitch lag */ - if( psIndices->signalType == TYPE_VOICED ) { - for( k = 0; k < psEncC->nb_subfr; k++ ) { - decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 ); - } - } else { - if( lag > 0 ) { - decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); - } - } - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); - /* Set up pointers to start of sub frame */ - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - subfr = 0; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - if( k == 2 ) { - /* RESET DELAYED DECISIONS */ - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { - if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ i ].RD_Q10; - Winner_ind = i; - } - } - for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) { - if( i != Winner_ind ) { - psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 ); - silk_assert( psDelDec[ i ].RD_Q10 >= 0 ); - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - last_smple_idx = smpl_buf_idx + decisionDelay; - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - - subfr = 0; - } - - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - NSQ->rewhite_flag = 1; - } - } - - silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, - psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); - - silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, - delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], - Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, - psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ k ].RD_Q10; - Winner_ind = k; - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - psIndices->Seed = psDD->SeedInit; - last_smple_idx = smpl_buf_idx + decisionDelay; - Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) ); - - /* Update states */ - NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech signal */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; - opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; - opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; - VARDECL( NSQ_sample_pair, psSampleState ); - NSQ_del_dec_struct *psDD; - NSQ_sample_struct *psSS; - - __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF; - __m128i b_Q12_0123, b_sr_Q12_0123; - SAVE_STACK; - - silk_assert( nStatesDelayedDecision > 0 ); - ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 ); - a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 ); - - if( opus_likely( predictLPCOrder == 16 ) ) { - a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 ); - a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 ); - } - - if( signalType == TYPE_VOICED ){ - b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 ); - b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - } - for( i = 0; i < length; i++ ) { - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q14 = 2; - { - __m128i tmpa, tmpb, pred_lag_ptr_tmp; - pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) ); - pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B ); - tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 ); - tmpa = _mm_srli_si128( tmpa, 2 ); - - pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */ - pred_lag_ptr_tmp = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_0123 ); - pred_lag_ptr_tmp = _mm_srli_si128( pred_lag_ptr_tmp, 2 ); - pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpa ); - - tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3, 2 ) );/* equal shift right 8 bytes */ - pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpb ); - LTP_pred_Q14 += _mm_cvtsi128_si32( pred_lag_ptr_tmp ); - - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - { - __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp; - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - /* Delayed decision state */ - psDD = &psDelDec[ k ]; - - /* Sample state */ - psSS = psSampleState[ k ]; - - /* Generate dither */ - psDD->Seed = silk_RAND( psDD->Seed ); - - /* Pointer used in short term prediction and shaping */ - psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; - /* Short-term prediction */ - silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); - - tmpb = _mm_setzero_si128(); - - /* step 1 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */ - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); /* 0, -1, -2, -3 */ - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 ); /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */ - - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */ - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - /* step 2 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) ); - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 ); - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - if ( opus_likely( predictLPCOrder == 16 ) ) - { - /* step 3 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) ); - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB ); - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - /* setp 4 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) ); - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF ); - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - /* add at last */ - /* equal shift right 8 bytes*/ - tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); - } - else - { - /* add at last */ - tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); /* equal shift right 8 bytes*/ - tmpb = _mm_add_epi32( tmpb, tmpa ); - LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); - - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); - } - - LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ 0 ] = tmp2; - n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 ); - psDD->sAR2_Q14[ j - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ j + 0 ] = tmp2; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); - } - psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); - - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ - - n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ - tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ - - r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( psDD->Seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); - - if( rd1_Q10 < rd2_Q10 ) { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 0 ].Q_Q10 = q1_Q10; - psSS[ 1 ].Q_Q10 = q2_Q10; - } else { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 0 ].Q_Q10 = q2_Q10; - psSS[ 1 ].Q_Q10 = q1_Q10; - } - - /* Update states for best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 0 ].xq_Q14 = xq_Q14; - - /* Update states for second best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 1 ].xq_Q14 = xq_Q14; - } - } - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; - for( k = 0; k < nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { - psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); - psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); - silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); - } - } - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; - RDmin_ind = k; - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, - ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); - silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); - } - - /* Write samples from winner to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psSS = &psSampleState[ k ][ 0 ]; - psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; - psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; - psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; - psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); - psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; - psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); - psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; - psDD->RD_Q10 = psSS->RD_Q10; - } - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - } - RESTORE_STACK; -} - -static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -) -{ - opus_int i, k, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - NSQ_del_dec_struct *psDD; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - - /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); - - for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); - /* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); - - xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); - - xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); - } - - for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - { - __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; - - /* prepare gain_adj_Q16 in packed 4 32-bits */ - xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 ); - - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 ) - { - xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) ); - /* equal shift right 4 bytes*/ - xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 ); - } - - for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - - /* Scale scalar states */ - psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); - } - for( i = 0; i < DECISION_DELAY; i++ ) { - psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] ); - psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); - } - } - } - } -} diff --git a/thirdparty/opus/silk/x86/NSQ_sse.c b/thirdparty/opus/silk/x86/NSQ_sse.c deleted file mode 100644 index bb3c5f1955..0000000000 --- a/thirdparty/opus/silk/x86/NSQ_sse.c +++ /dev/null @@ -1,720 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include "main.h" -#include "celt/x86/x86cpu.h" -#include "stack_alloc.h" - -static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -); - -static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int32 table[][4] /* I */ -); - -void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int k, lag, start_idx, LSF_interpolation_flag; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - - opus_int32 table[ 64 ][ 4 ]; - opus_int32 tmp1; - opus_int32 q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; - - SAVE_STACK; - - NSQ->rand_seed = psIndices->Seed; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - - /* 0 */ - q1_Q10 = offset_Q10; - q2_Q10 = offset_Q10 + ( 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q20 = q1_Q10 * Lambda_Q10; - rd2_Q20 = q2_Q10 * Lambda_Q10; - - table[ 32 ][ 0 ] = q1_Q10; - table[ 32 ][ 1 ] = q2_Q10; - table[ 32 ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 32 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - - /* -1 */ - q1_Q10 = offset_Q10 - ( 1024 - QUANT_LEVEL_ADJUST_Q10 ); - q2_Q10 = offset_Q10; - rd1_Q20 = - q1_Q10 * Lambda_Q10; - rd2_Q20 = q2_Q10 * Lambda_Q10; - - table[ 31 ][ 0 ] = q1_Q10; - table[ 31 ][ 1 ] = q2_Q10; - table[ 31 ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 31 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - - /* > 0 */ - for (k = 1; k <= 31; k++) - { - tmp1 = offset_Q10 + silk_LSHIFT( k, 10 ); - - q1_Q10 = tmp1 - QUANT_LEVEL_ADJUST_Q10; - q2_Q10 = tmp1 - QUANT_LEVEL_ADJUST_Q10 + 1024; - rd1_Q20 = q1_Q10 * Lambda_Q10; - rd2_Q20 = q2_Q10 * Lambda_Q10; - - table[ 32 + k ][ 0 ] = q1_Q10; - table[ 32 + k ][ 1 ] = q2_Q10; - table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - } - - /* < -1 */ - for (k = -32; k <= -2; k++) - { - tmp1 = offset_Q10 + silk_LSHIFT( k, 10 ); - - q1_Q10 = tmp1 + QUANT_LEVEL_ADJUST_Q10; - q2_Q10 = tmp1 + QUANT_LEVEL_ADJUST_Q10 + 1024; - rd1_Q20 = - q1_Q10 * Lambda_Q10; - rd2_Q20 = - q2_Q10 * Lambda_Q10; - - table[ 32 + k ][ 0 ] = q1_Q10; - table[ 32 + k ][ 1 ] = q2_Q10; - table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - } - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - /* Set up pointers to start of sub frame */ - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->rewhite_flag = 1; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - } - } - - silk_nsq_scale_states_sse4_1( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); - - if ( opus_likely( ( 10 == psEncC->shapingLPCOrder ) && ( 16 == psEncC->predictLPCOrder) ) ) - { - silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], - offset_Q10, psEncC->subfr_length, &(table[32]) ); - } - else - { - silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, - offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); - } - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Update lagPrev for next frame */ - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech and noise shaping signals */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/***********************************/ -/* silk_noise_shape_quantizer_10_16 */ -/***********************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int32 table[][4] /* I */ -) -{ - opus_int i; - opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; - opus_int32 n_LF_Q12, r_Q10, q1_Q0, q1_Q10, q2_Q10; - opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; - - __m128i xmm_tempa, xmm_tempb; - - __m128i xmm_one; - - __m128i psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF; - __m128i psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF; - __m128i a_Q12_01234567, a_Q12_89ABCDEF; - - __m128i sAR2_Q14_hi_76543210, sAR2_Q14_lo_76543210; - __m128i AR_shp_Q13_76543210; - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - /* Set up short term AR state */ - psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; - - sLF_AR_shp_Q14 = NSQ->sLF_AR_shp_Q14; - xq_Q14 = psLPC_Q14[ 0 ]; - LTP_pred_Q13 = 0; - - /* load a_Q12 */ - xmm_one = _mm_set_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 ); - - /* load a_Q12[0] - a_Q12[7] */ - a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(&a_Q12[ 0 ] ) ); - /* load a_Q12[ 8 ] - a_Q12[ 15 ] */ - a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(&a_Q12[ 8 ] ) ); - - a_Q12_01234567 = _mm_shuffle_epi8( a_Q12_01234567, xmm_one ); - a_Q12_89ABCDEF = _mm_shuffle_epi8( a_Q12_89ABCDEF, xmm_one ); - - /* load AR_shp_Q13 */ - AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(&AR_shp_Q13[0] ) ); - - /* load psLPC_Q14 */ - xmm_one = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0 ); - - xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-16]) ); - xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-12]) ); - - xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); - xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); - - psLPC_Q14_hi_89ABCDEF = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); - psLPC_Q14_lo_89ABCDEF = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); - - xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -8 ]) ); - xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -4 ]) ); - - xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); - xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); - - psLPC_Q14_hi_01234567 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); - psLPC_Q14_lo_01234567 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); - - /* load sAR2_Q14 */ - xmm_tempa = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 0 ]) ) ); - xmm_tempb = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 4 ]) ) ); - - xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); - xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); - - sAR2_Q14_hi_76543210 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); - sAR2_Q14_lo_76543210 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); - - /* prepare 1 in 8 * 16bit */ - xmm_one = _mm_set1_epi16(1); - - for( i = 0; i < length; i++ ) - { - /* Short-term prediction */ - __m128i xmm_hi_07, xmm_hi_8F, xmm_lo_07, xmm_lo_8F; - - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = 8; /* silk_RSHIFT( predictLPCOrder, 1 ); */ - - /* shift psLPC_Q14 */ - psLPC_Q14_hi_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF, 2 ); - psLPC_Q14_lo_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF, 2 ); - - psLPC_Q14_hi_01234567 = _mm_srli_si128( psLPC_Q14_hi_01234567, 2 ); - psLPC_Q14_lo_01234567 = _mm_srli_si128( psLPC_Q14_lo_01234567, 2 ); - - psLPC_Q14_hi_01234567 = _mm_insert_epi16( psLPC_Q14_hi_01234567, (xq_Q14 >> 16), 7 ); - psLPC_Q14_lo_01234567 = _mm_insert_epi16( psLPC_Q14_lo_01234567, (xq_Q14), 7 ); - - /* high part, use pmaddwd, results in 4 32-bit */ - xmm_hi_07 = _mm_madd_epi16( psLPC_Q14_hi_01234567, a_Q12_01234567 ); - xmm_hi_8F = _mm_madd_epi16( psLPC_Q14_hi_89ABCDEF, a_Q12_89ABCDEF ); - - /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed, _mm_srai_epi16(psLPC_Q14_lo_01234567, 15) */ - xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_01234567 ); - xmm_tempb = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_89ABCDEF ); - - xmm_tempa = _mm_and_si128( xmm_tempa, a_Q12_01234567 ); - xmm_tempb = _mm_and_si128( xmm_tempb, a_Q12_89ABCDEF ); - - xmm_lo_07 = _mm_mulhi_epi16( psLPC_Q14_lo_01234567, a_Q12_01234567 ); - xmm_lo_8F = _mm_mulhi_epi16( psLPC_Q14_lo_89ABCDEF, a_Q12_89ABCDEF ); - - xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa ); - xmm_lo_8F = _mm_add_epi16( xmm_lo_8F, xmm_tempb ); - - xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one ); - xmm_lo_8F = _mm_madd_epi16( xmm_lo_8F, xmm_one ); - - /* accumulate */ - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_hi_8F ); - xmm_lo_07 = _mm_add_epi32( xmm_lo_07, xmm_lo_8F ); - - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 ); - - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) ); - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) ); - - LPC_pred_Q10 += _mm_cvtsi128_si32( xmm_hi_07 ); - - /* Long-term prediction */ - if ( opus_likely( signalType == TYPE_VOICED ) ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q13 = 2; - { - __m128i b_Q14_3210, b_Q14_0123, pred_lag_ptr_0123; - - b_Q14_3210 = OP_CVTEPI16_EPI32_M64( b_Q14 ); - b_Q14_0123 = _mm_shuffle_epi32( b_Q14_3210, 0x1B ); - - /* loaded: [0] [-1] [-2] [-3] */ - pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) ); - /* shuffle to [-3] [-2] [-1] [0] and to new xmm */ - xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, 0x1B ); - /*64-bit multiply, a[2] * b[-2], a[0] * b[0] */ - xmm_tempa = _mm_mul_epi32( xmm_tempa, b_Q14_3210 ); - /* right shift 2 bytes (16 bits), zero extended */ - xmm_tempa = _mm_srli_si128( xmm_tempa, 2 ); - - /* a[1] * b[-1], a[3] * b[-3] */ - pred_lag_ptr_0123 = _mm_mul_epi32( pred_lag_ptr_0123, b_Q14_0123 ); - pred_lag_ptr_0123 = _mm_srli_si128( pred_lag_ptr_0123, 2 ); - - pred_lag_ptr_0123 = _mm_add_epi32( pred_lag_ptr_0123, xmm_tempa ); - /* equal shift right 8 bytes*/ - xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - xmm_tempa = _mm_add_epi32( xmm_tempa, pred_lag_ptr_0123 ); - - LTP_pred_Q13 += _mm_cvtsi128_si32( xmm_tempa ); - - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - pred_lag_ptr++; - } - } - - /* Noise shape feedback */ - NSQ->sAR2_Q14[ 9 ] = NSQ->sAR2_Q14[ 8 ]; - NSQ->sAR2_Q14[ 8 ] = _mm_cvtsi128_si32( _mm_srli_si128(_mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ), 12 ) ); - - sAR2_Q14_hi_76543210 = _mm_slli_si128( sAR2_Q14_hi_76543210, 2 ); - sAR2_Q14_lo_76543210 = _mm_slli_si128( sAR2_Q14_lo_76543210, 2 ); - - sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (xq_Q14 >> 16), 0 ); - sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (xq_Q14), 0 ); - - /* high part, use pmaddwd, results in 4 32-bit */ - xmm_hi_07 = _mm_madd_epi16( sAR2_Q14_hi_76543210, AR_shp_Q13_76543210 ); - - /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed,_mm_srai_epi16(sAR2_Q14_lo_76543210, 15) */ - xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), sAR2_Q14_lo_76543210 ); - xmm_tempa = _mm_and_si128( xmm_tempa, AR_shp_Q13_76543210 ); - - xmm_lo_07 = _mm_mulhi_epi16( sAR2_Q14_lo_76543210, AR_shp_Q13_76543210 ); - xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa ); - - xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one ); - - /* accumulate */ - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 ); - - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) ); - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) ); - - n_AR_Q12 = 5 + _mm_cvtsi128_si32( xmm_hi_07 ); - - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 8 ], AR_shp_Q13[ 8 ] ); - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 9 ], AR_shp_Q13[ 9 ] ); - - n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */ - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, sLF_AR_shp_Q14, Tilt_Q14 ); - - n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); - n_LF_Q12 = silk_SMLAWT( n_LF_Q12, sLF_AR_shp_Q14, LF_shp_Q14 ); - - silk_assert( lag > 0 || signalType != TYPE_VOICED ); - - /* Combine prediction and noise shaping signals */ - tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ - tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); - shp_lag_ptr++; - - tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 ); /* Q13 */ - tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 3 ); /* Q10 */ - } else { - tmp1 = silk_RSHIFT_ROUND( tmp1, 2 ); /* Q10 */ - } - - r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Generate dither */ - NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); - - /* Flip sign depending on dither */ - tmp2 = -r_Q10; - if ( NSQ->rand_seed < 0 ) r_Q10 = tmp2; - - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - - q1_Q10 = table[q1_Q0][0]; - q2_Q10 = table[q1_Q0][1]; - - if (r_Q10 * table[q1_Q0][2] - table[q1_Q0][3] < 0) - { - q1_Q10 = q2_Q10; - } - - pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 ); - - /* Excitation */ - exc_Q14 = silk_LSHIFT( q1_Q10, 4 ); - - tmp2 = -exc_Q14; - if ( NSQ->rand_seed < 0 ) exc_Q14 = tmp2; - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 ); - xq_Q14 = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 ); - - /* Update states */ - psLPC_Q14++; - *psLPC_Q14 = xq_Q14; - sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); - - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); - sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Make dither dependent on quantized signal */ - NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] ); - } - - NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14; - - /* Scale XQ back to normal level before saving */ - psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH ]; - - /* write back sAR2_Q14 */ - xmm_tempa = _mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ); - xmm_tempb = _mm_unpacklo_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ); - _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa ); - _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb ); - - /* xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); */ - { - __m128i xmm_Gain_Q10; - __m128i xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, xmm_xq_Q14_7654, xmm_xq_Q14_x7x5; - - /* prepare (1 << 7) in packed 4 32-bits */ - xmm_tempa = _mm_set1_epi32( (1 << 7) ); - - /* prepare Gain_Q10 in packed 4 32-bits */ - xmm_Gain_Q10 = _mm_set1_epi32( Gain_Q10 ); - - /* process xq */ - for (i = 0; i < length - 7; i += 8) - { - xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 0 ] ) ) ); - xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 4 ] ) ) ); - - /* equal shift right 4 bytes*/ - xmm_xq_Q14_x3x1 = _mm_shuffle_epi32( xmm_xq_Q14_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - /* equal shift right 4 bytes*/ - xmm_xq_Q14_x7x5 = _mm_shuffle_epi32( xmm_xq_Q14_7654, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_xq_Q14_3210 = _mm_mul_epi32( xmm_xq_Q14_3210, xmm_Gain_Q10 ); - xmm_xq_Q14_x3x1 = _mm_mul_epi32( xmm_xq_Q14_x3x1, xmm_Gain_Q10 ); - xmm_xq_Q14_7654 = _mm_mul_epi32( xmm_xq_Q14_7654, xmm_Gain_Q10 ); - xmm_xq_Q14_x7x5 = _mm_mul_epi32( xmm_xq_Q14_x7x5, xmm_Gain_Q10 ); - - xmm_xq_Q14_3210 = _mm_srli_epi64( xmm_xq_Q14_3210, 16 ); - xmm_xq_Q14_x3x1 = _mm_slli_epi64( xmm_xq_Q14_x3x1, 16 ); - xmm_xq_Q14_7654 = _mm_srli_epi64( xmm_xq_Q14_7654, 16 ); - xmm_xq_Q14_x7x5 = _mm_slli_epi64( xmm_xq_Q14_x7x5, 16 ); - - xmm_xq_Q14_3210 = _mm_blend_epi16( xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, 0xCC ); - xmm_xq_Q14_7654 = _mm_blend_epi16( xmm_xq_Q14_7654, xmm_xq_Q14_x7x5, 0xCC ); - - /* silk_RSHIFT_ROUND(xq, 8) */ - xmm_xq_Q14_3210 = _mm_add_epi32( xmm_xq_Q14_3210, xmm_tempa ); - xmm_xq_Q14_7654 = _mm_add_epi32( xmm_xq_Q14_7654, xmm_tempa ); - - xmm_xq_Q14_3210 = _mm_srai_epi32( xmm_xq_Q14_3210, 8 ); - xmm_xq_Q14_7654 = _mm_srai_epi32( xmm_xq_Q14_7654, 8 ); - - /* silk_SAT16 */ - xmm_xq_Q14_3210 = _mm_packs_epi32( xmm_xq_Q14_3210, xmm_xq_Q14_7654 ); - - /* save to xq */ - _mm_storeu_si128( (__m128i *)(&xq[ i ] ), xmm_xq_Q14_3210 ); - } - } - for ( ; i < length; i++) - { - xq[i] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); - } - - /* Update LPC synth buffer */ - silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); -} - -static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -) -{ - opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - - /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); - - for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); - - /* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); - - xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); - - xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x_Q3_x2x0 ); - } - - for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; - - /* prepare gain_adj_Q16 in packed 4 32-bits */ - xmm_gain_adj_Q16 = _mm_set1_epi32(gain_adj_Q16); - - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 ) - { - xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) ); - /* equal shift right 4 bytes*/ - xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 ); - } - - for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); - } - } -} diff --git a/thirdparty/opus/silk/x86/SigProc_FIX_sse.h b/thirdparty/opus/silk/x86/SigProc_FIX_sse.h deleted file mode 100644 index 61efa8da41..0000000000 --- a/thirdparty/opus/silk/x86/SigProc_FIX_sse.h +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SIGPROC_FIX_SSE_H -#define SIGPROC_FIX_SSE_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -void silk_burg_modified_sse4_1( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) - -#else - -extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */); - -# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) - -#endif - -opus_int64 silk_inner_prod16_aligned_64_sse4_1( - const opus_int16 *inVec1, - const opus_int16 *inVec2, - const opus_int len -); - - -#if defined(OPUS_X86_PRESUME_SSE4_1) - -#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_sse4_1(inVec1, inVec2, len)) - -#else - -extern opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[OPUS_ARCHMASK + 1])( - const opus_int16 *inVec1, - const opus_int16 *inVec2, - const opus_int len); - -# define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((*SILK_INNER_PROD16_ALIGNED_64_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len)) - -#endif -#endif -#endif diff --git a/thirdparty/opus/silk/x86/VAD_sse.c b/thirdparty/opus/silk/x86/VAD_sse.c deleted file mode 100644 index 4e90f4410d..0000000000 --- a/thirdparty/opus/silk/x86/VAD_sse.c +++ /dev/null @@ -1,277 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include - -#include "main.h" -#include "stack_alloc.h" - -/* Weighting factors for tilt measure */ -static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 }; - -/***************************************/ -/* Get the speech activity level in Q8 */ -/***************************************/ -opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if success */ - silk_encoder_state *psEncC, /* I/O Encoder state */ - const opus_int16 pIn[] /* I PCM input */ -) -{ - opus_int SA_Q15, pSNR_dB_Q7, input_tilt; - opus_int decimated_framelength1, decimated_framelength2; - opus_int decimated_framelength; - opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; - opus_int32 sumSquared, smooth_coef_Q16; - opus_int16 HPstateTmp; - VARDECL( opus_int16, X ); - opus_int32 Xnrg[ VAD_N_BANDS ]; - opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; - opus_int32 speech_nrg, x_tmp; - opus_int X_offset[ VAD_N_BANDS ]; - opus_int ret = 0; - silk_VAD_state *psSilk_VAD = &psEncC->sVAD; - - SAVE_STACK; - - /* Safety checks */ - silk_assert( VAD_N_BANDS == 4 ); - silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); - silk_assert( psEncC->frame_length <= 512 ); - silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); - - /***********************/ - /* Filter and Decimate */ - /***********************/ - decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); - decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); - decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); - /* Decimate into 4 bands: - 0 L 3L L 3L 5L - - -- - -- -- - 8 8 2 4 4 - - [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | - - They're arranged to allow the minimal ( frame_length / 4 ) extra - scratch space during the downsampling process */ - X_offset[ 0 ] = 0; - X_offset[ 1 ] = decimated_framelength + decimated_framelength2; - X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; - X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; - ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); - - /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ - silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], - X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); - - /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], - X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); - - /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], - X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); - - /*********************************************/ - /* HP filter on lowest band (differentiator) */ - /*********************************************/ - X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); - HPstateTmp = X[ decimated_framelength - 1 ]; - for( i = decimated_framelength - 1; i > 0; i-- ) { - X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); - X[ i ] -= X[ i - 1 ]; - } - X[ 0 ] -= psSilk_VAD->HPstate; - psSilk_VAD->HPstate = HPstateTmp; - - /*************************************/ - /* Calculate the energy in each band */ - /*************************************/ - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Find the decimated framelength in the non-uniformly divided bands */ - decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); - - /* Split length into subframe lengths */ - dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); - dec_subframe_offset = 0; - - /* Compute energy per sub-frame */ - /* initialize with summed energy of last subframe */ - Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; - for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { - __m128i xmm_X, xmm_acc; - sumSquared = 0; - - xmm_acc = _mm_setzero_si128(); - - for( i = 0; i < dec_subframe_length - 7; i += 8 ) - { - xmm_X = _mm_loadu_si128( (__m128i *)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) ); - xmm_X = _mm_srai_epi16( xmm_X, 3 ); - xmm_X = _mm_madd_epi16( xmm_X, xmm_X ); - xmm_acc = _mm_add_epi32( xmm_acc, xmm_X ); - } - - xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) ); - xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) ); - - sumSquared += _mm_cvtsi128_si32( xmm_acc ); - - for( ; i < dec_subframe_length; i++ ) { - /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ - /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ - x_tmp = silk_RSHIFT( - X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); - sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); - - /* Safety check */ - silk_assert( sumSquared >= 0 ); - } - - /* Add/saturate summed energy of current subframe */ - if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); - } else { - /* Look-ahead subframe */ - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); - } - - dec_subframe_offset += dec_subframe_length; - } - psSilk_VAD->XnrgSubfr[ b ] = sumSquared; - } - - /********************/ - /* Noise estimation */ - /********************/ - silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); - - /***********************************************/ - /* Signal-plus-noise to noise ratio estimation */ - /***********************************************/ - sumSquared = 0; - input_tilt = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; - if( speech_nrg > 0 ) { - /* Divide, with sufficient resolution */ - if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); - } else { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); - } - - /* Convert to log domain */ - SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; - - /* Sum-of-squares */ - sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ - - /* Tilt measure */ - if( speech_nrg < ( (opus_int32)1 << 20 ) ) { - /* Scale down SNR value for small subband speech energies */ - SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); - } - input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); - } else { - NrgToNoiseRatio_Q8[ b ] = 256; - } - } - - /* Mean-of-squares */ - sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ - - /* Root-mean-square approximation, scale to dBs, and write to output pointer */ - pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ - - /*********************************/ - /* Speech Probability Estimation */ - /*********************************/ - SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); - - /**************************/ - /* Frequency Tilt Measure */ - /**************************/ - psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); - - /**************************************************/ - /* Scale the sigmoid output based on power levels */ - /**************************************************/ - speech_nrg = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ - speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); - } - - /* Power scaling */ - if( speech_nrg <= 0 ) { - SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); - } else if( speech_nrg < 32768 ) { - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); - } else { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); - } - - /* square-root */ - speech_nrg = silk_SQRT_APPROX( speech_nrg ); - SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); - } - - /* Copy the resulting speech activity in Q8 */ - psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); - - /***********************************/ - /* Energy Level and SNR estimation */ - /***********************************/ - /* Smoothing coefficient */ - smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); - - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - smooth_coef_Q16 >>= 1; - } - - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* compute smoothed energy-to-noise ratio per band */ - psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ], - NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 ); - - /* signal to noise ratio in dB per band */ - SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 ); - /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ - psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); - } - - RESTORE_STACK; - return( ret ); -} diff --git a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c b/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c deleted file mode 100644 index 74d6c6d0ec..0000000000 --- a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c +++ /dev/null @@ -1,142 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include "main.h" -#include "celt/x86/x86cpu.h" - -/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ -void silk_VQ_WMat_EC_sse4_1( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -) -{ - opus_int k, gain_tmp_Q7; - const opus_int8 *cb_row_Q7; - opus_int16 diff_Q14[ 5 ]; - opus_int32 sum1_Q14, sum2_Q16; - - __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5; - /* Loop over codebook */ - *rate_dist_Q14 = silk_int32_MAX; - cb_row_Q7 = cb_Q7; - for( k = 0; k < L; k++ ) { - gain_tmp_Q7 = cb_gain_Q7[k]; - - diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); - - C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] ); - C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); - C_tmp2 = _mm_slli_epi32( C_tmp2, 7 ); - C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 ); - - diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 ); - diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 ); - diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 ); - diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 ); - - /* Weighted rate */ - sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); - - /* Penalty for too large gain */ - sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); - - silk_assert( sum1_Q14 >= 0 ); - - /* first row of W_Q18 */ - C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) ); - C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp4 = _mm_srli_si128( C_tmp4, 2 ); - - C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - - C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp5 = _mm_srli_si128( C_tmp5, 2 ); - - C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 ); - C_tmp5 = _mm_slli_epi32( C_tmp5, 1 ); - - C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); - sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 ); - - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); - - /* second row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); - - /* third row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); - - /* fourth row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); - - /* last row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); - - silk_assert( sum1_Q14 >= 0 ); - - /* find best */ - if( sum1_Q14 < *rate_dist_Q14 ) { - *rate_dist_Q14 = sum1_Q14; - *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; - } - - /* Go to next cbk vector */ - cb_row_Q7 += LTP_ORDER; - } -} diff --git a/thirdparty/opus/silk/x86/main_sse.h b/thirdparty/opus/silk/x86/main_sse.h deleted file mode 100644 index d8d61310ed..0000000000 --- a/thirdparty/opus/silk/x86/main_sse.h +++ /dev/null @@ -1,277 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef MAIN_SSE_H -#define MAIN_SSE_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) - -# define OVERRIDE_silk_VQ_WMat_EC - -void silk_VQ_WMat_EC_sse4_1( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -); - -#if defined OPUS_X86_PRESUME_SSE4_1 - -#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) - -#else - -extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -); - -# define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) - -#endif - -# define OVERRIDE_silk_NSQ - -void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if defined OPUS_X86_PRESUME_SSE4_1 - -#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#else - -extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#endif - -# define OVERRIDE_silk_NSQ_del_dec - -void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if defined OPUS_X86_PRESUME_SSE4_1 - -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#else - -extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#endif - -void silk_noise_shape_quantizer( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - int arch /* I Architecture */ -); - -/**************************/ -/* Noise level estimation */ -/**************************/ -void silk_VAD_GetNoiseLevels( - const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -); - -# define OVERRIDE_silk_VAD_GetSA_Q8 - -opus_int silk_VAD_GetSA_Q8_sse4_1( - silk_encoder_state *psEnC, - const opus_int16 pIn[] -); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn)) - -#else - -# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \ - ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn)) - -extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])( - silk_encoder_state *psEnC, - const opus_int16 pIn[]); - -# define OVERRIDE_silk_warped_LPC_analysis_filter_FIX - -#endif - -void silk_warped_LPC_analysis_filter_FIX_sse4_1( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ - ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) - -#else - -extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -); - -# define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ - ((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) - -#endif - -# endif -#endif diff --git a/thirdparty/opus/silk/x86/x86_silk_map.c b/thirdparty/opus/silk/x86/x86_silk_map.c deleted file mode 100644 index 818841f2c1..0000000000 --- a/thirdparty/opus/silk/x86/x86_silk_map.c +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if defined(HAVE_CONFIG_H) -#include "config.h" -#endif - -#include "celt/x86/x86cpu.h" -#include "structs.h" -#include "SigProc_FIX.h" -#include "pitch.h" -#include "main.h" - -#if !defined(OPUS_X86_PRESUME_SSE4_1) - -#if defined(FIXED_POINT) - -#include "fixed/main_FIX.h" - -opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )( - const opus_int16 *inVec1, - const opus_int16 *inVec2, - const opus_int len -) = { - silk_inner_prod16_aligned_64_c, /* non-sse */ - silk_inner_prod16_aligned_64_c, - silk_inner_prod16_aligned_64_c, - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */ -}; - -#endif - -opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( - silk_encoder_state *psEncC, - const opus_int16 pIn[] -) = { - silk_VAD_GetSA_Q8_c, /* non-sse */ - silk_VAD_GetSA_Q8_c, - silk_VAD_GetSA_Q8_c, - MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ -}; - -void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) = { - silk_NSQ_c, /* non-sse */ - silk_NSQ_c, - silk_NSQ_c, - MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ -}; - -void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -) = { - silk_VQ_WMat_EC_c, /* non-sse */ - silk_VQ_WMat_EC_c, - silk_VQ_WMat_EC_c, - MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ -}; - -void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) = { - silk_NSQ_del_dec_c, /* non-sse */ - silk_NSQ_del_dec_c, - silk_NSQ_del_dec_c, - MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ -}; - -#if defined(FIXED_POINT) - -void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) = { - silk_warped_LPC_analysis_filter_FIX_c, /* non-sse */ - silk_warped_LPC_analysis_filter_FIX_c, - silk_warped_LPC_analysis_filter_FIX_c, - MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */ -}; - -void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -) = { - silk_burg_modified_c, /* non-sse */ - silk_burg_modified_c, - silk_burg_modified_c, - MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_burg_modified ) /* avx */ -}; - -#endif -#endif diff --git a/thirdparty/opus/stream.c b/thirdparty/opus/stream.c deleted file mode 100644 index 0238a6b31b..0000000000 --- a/thirdparty/opus/stream.c +++ /dev/null @@ -1,366 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: stdio-based convenience library for opening/seeking/decoding - last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $ - - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" -#include -#include -#include -#include -#include -#if defined(_WIN32) -# include -#endif - -typedef struct OpusMemStream OpusMemStream; - -#define OP_MEM_SIZE_MAX (~(size_t)0>>1) -#define OP_MEM_DIFF_MAX ((ptrdiff_t)OP_MEM_SIZE_MAX) - -/*The context information needed to read from a block of memory as if it were a - file.*/ -struct OpusMemStream{ - /*The block of memory to read from.*/ - const unsigned char *data; - /*The total size of the block. - This must be at most OP_MEM_SIZE_MAX to prevent signed overflow while - seeking.*/ - ptrdiff_t size; - /*The current file position. - This is allowed to be set arbitrarily greater than size (i.e., past the end - of the block, though we will not read data past the end of the block), but - is not allowed to be negative (i.e., before the beginning of the block).*/ - ptrdiff_t pos; -}; - -static int op_fread(void *_stream,unsigned char *_ptr,int _buf_size){ - FILE *stream; - size_t ret; - /*Check for empty read.*/ - if(_buf_size<=0)return 0; - stream=(FILE *)_stream; - ret=fread(_ptr,1,_buf_size,stream); - OP_ASSERT(ret<=(size_t)_buf_size); - /*If ret==0 and !feof(stream), there was a read error.*/ - return ret>0||feof(stream)?(int)ret:OP_EREAD; -} - -static int op_fseek(void *_stream,opus_int64 _offset,int _whence){ -#if defined(_WIN32) - /*_fseeki64() is not exposed until MSCVCRT80. - This is the default starting with MSVC 2005 (_MSC_VER>=1400), but we want - to allow linking against older MSVCRT versions for compatibility back to - XP without installing extra runtime libraries. - i686-pc-mingw32 does not have fseeko() and requires - __MSVCRT_VERSION__>=0x800 for _fseeki64(), which screws up linking with - other libraries (that don't use MSVCRT80 from MSVC 2005 by default). - i686-w64-mingw32 does have fseeko() and respects _FILE_OFFSET_BITS, but I - don't know how to detect that at compile time. - We could just use fseeko64() (which is available in both), but its - implemented using fgetpos()/fsetpos() just like this code, except without - the overflow checking, so we prefer our version.*/ - opus_int64 pos; - /*We don't use fpos_t directly because it might be a struct if __STDC__ is - non-zero or _INTEGRAL_MAX_BITS < 64. - I'm not certain when the latter is true, but someone could in theory set - the former. - Either way, it should be binary compatible with a normal 64-bit int (this - assumption is not portable, but I believe it is true for MSVCRT).*/ - OP_ASSERT(sizeof(pos)==sizeof(fpos_t)); - /*Translate the seek to an absolute one.*/ - if(_whence==SEEK_CUR){ - int ret; - ret=fgetpos((FILE *)_stream,(fpos_t *)&pos); - if(ret)return ret; - } - else if(_whence==SEEK_END)pos=_filelengthi64(_fileno((FILE *)_stream)); - else if(_whence==SEEK_SET)pos=0; - else return -1; - /*Check for errors or overflow.*/ - if(pos<0||_offset<-pos||_offset>OP_INT64_MAX-pos)return -1; - pos+=_offset; - return fsetpos((FILE *)_stream,(fpos_t *)&pos); -#else - /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer - it except on Windows.*/ - return fseeko((FILE *)_stream,(off_t)_offset,_whence); -#endif -} - -static opus_int64 op_ftell(void *_stream){ -#if defined(_WIN32) - /*_ftelli64() is not exposed until MSCVCRT80, and ftello()/ftello64() have - the same problems as fseeko()/fseeko64() in MingW. - See above for a more detailed explanation.*/ - opus_int64 pos; - OP_ASSERT(sizeof(pos)==sizeof(fpos_t)); - return fgetpos((FILE *)_stream,(fpos_t *)&pos)?-1:pos; -#else - /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer - it except on Windows.*/ - return ftello((FILE *)_stream); -#endif -} - -static const OpusFileCallbacks OP_FILE_CALLBACKS={ - op_fread, - op_fseek, - op_ftell, - (op_close_func)fclose -}; - -#if defined(_WIN32) -# include -# include - -/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API, - so if we just pass the path to fopen(), then there'd be no way for a user - of our API to open a Unicode filename. - Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API. - This makes this API more consistent with platforms where the character set - used by fopen is the same as used on disk, which is generally UTF-8, and - with our metadata API, which always uses UTF-8.*/ -static wchar_t *op_utf8_to_utf16(const char *_src){ - wchar_t *dst; - size_t len; - len=strlen(_src); - /*Worst-case output is 1 wide character per 1 input character.*/ - dst=(wchar_t *)_ogg_malloc(sizeof(*dst)*(len+1)); - if(dst!=NULL){ - size_t si; - size_t di; - for(di=si=0;si=0x80U){ - /*This is a 2-byte sequence that is not overlong.*/ - dst[di++]=w; - si++; - continue; - } - } - else{ - int c2; - /*This is safe, because c1 was not 0 and _src is NUL-terminated.*/ - c2=(unsigned char)_src[si+2]; - if((c2&0xC0)==0x80){ - /*Found at least two continuation bytes.*/ - if((c0&0xF0)==0xE0){ - wchar_t w; - /*Start byte says this is a 3-byte sequence.*/ - w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F; - if(w>=0x800U&&(w<0xD800||w>=0xE000)&&w<0xFFFE){ - /*This is a 3-byte sequence that is not overlong, not a - UTF-16 surrogate pair value, and not a 'not a character' - value.*/ - dst[di++]=w; - si+=2; - continue; - } - } - else{ - int c3; - /*This is safe, because c2 was not 0 and _src is - NUL-terminated.*/ - c3=(unsigned char)_src[si+3]; - if((c3&0xC0)==0x80){ - /*Found at least three continuation bytes.*/ - if((c0&0xF8)==0xF0){ - opus_uint32 w; - /*Start byte says this is a 4-byte sequence.*/ - w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F); - if(w>=0x10000U&&w<0x110000U){ - /*This is a 4-byte sequence that is not overlong and not - greater than the largest valid Unicode code point. - Convert it to a surrogate pair.*/ - w-=0x10000; - dst[di++]=(wchar_t)(0xD800+(w>>10)); - dst[di++]=(wchar_t)(0xDC00+(w&0x3FF)); - si+=3; - continue; - } - } - } - } - } - } - } - } - /*If we got here, we encountered an illegal UTF-8 sequence.*/ - _ogg_free(dst); - return NULL; - } - OP_ASSERT(di<=len); - dst[di]='\0'; - } - return dst; -} - -#endif - -void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){ - FILE *fp; -#if !defined(_WIN32) - fp=fopen(_path,_mode); -#else - fp=NULL; - if(_path==NULL||_mode==NULL)errno=EINVAL; - else{ - wchar_t *wpath; - wchar_t *wmode; - wpath=op_utf8_to_utf16(_path); - wmode=op_utf8_to_utf16(_mode); - if(wmode==NULL)errno=EINVAL; - else if(wpath==NULL)errno=ENOENT; - else fp=_wfopen(wpath,wmode); - _ogg_free(wmode); - _ogg_free(wpath); - } -#endif - if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; - return fp; -} - -void *op_fdopen(OpusFileCallbacks *_cb,int _fd,const char *_mode){ - FILE *fp; - fp=fdopen(_fd,_mode); - if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; - return fp; -} - -void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode, - void *_stream){ - FILE *fp; -#if !defined(_WIN32) - fp=freopen(_path,_mode,(FILE *)_stream); -#else - fp=NULL; - if(_path==NULL||_mode==NULL)errno=EINVAL; - else{ - wchar_t *wpath; - wchar_t *wmode; - wpath=op_utf8_to_utf16(_path); - wmode=op_utf8_to_utf16(_mode); - if(wmode==NULL)errno=EINVAL; - else if(wpath==NULL)errno=ENOENT; - else fp=_wfreopen(wpath,wmode,(FILE *)_stream); - _ogg_free(wmode); - _ogg_free(wpath); - } -#endif - if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; - return fp; -} - -static int op_mem_read(void *_stream,unsigned char *_ptr,int _buf_size){ - OpusMemStream *stream; - ptrdiff_t size; - ptrdiff_t pos; - stream=(OpusMemStream *)_stream; - /*Check for empty read.*/ - if(_buf_size<=0)return 0; - size=stream->size; - pos=stream->pos; - /*Check for EOF.*/ - if(pos>=size)return 0; - /*Check for a short read.*/ - _buf_size=(int)OP_MIN(size-pos,_buf_size); - memcpy(_ptr,stream->data+pos,_buf_size); - pos+=_buf_size; - stream->pos=pos; - return _buf_size; -} - -static int op_mem_seek(void *_stream,opus_int64 _offset,int _whence){ - OpusMemStream *stream; - ptrdiff_t pos; - stream=(OpusMemStream *)_stream; - pos=stream->pos; - OP_ASSERT(pos>=0); - switch(_whence){ - case SEEK_SET:{ - /*Check for overflow:*/ - if(_offset<0||_offset>OP_MEM_DIFF_MAX)return -1; - pos=(ptrdiff_t)_offset; - }break; - case SEEK_CUR:{ - /*Check for overflow:*/ - if(_offset<-pos||_offset>OP_MEM_DIFF_MAX-pos)return -1; - pos=(ptrdiff_t)(pos+_offset); - }break; - case SEEK_END:{ - ptrdiff_t size; - size=stream->size; - OP_ASSERT(size>=0); - /*Check for overflow:*/ - if(_offset>size||_offsetpos=pos; - return 0; -} - -static opus_int64 op_mem_tell(void *_stream){ - OpusMemStream *stream; - stream=(OpusMemStream *)_stream; - return (ogg_int64_t)stream->pos; -} - -static int op_mem_close(void *_stream){ - _ogg_free(_stream); - return 0; -} - -static const OpusFileCallbacks OP_MEM_CALLBACKS={ - op_mem_read, - op_mem_seek, - op_mem_tell, - op_mem_close -}; - -void *op_mem_stream_create(OpusFileCallbacks *_cb, - const unsigned char *_data,size_t _size){ - OpusMemStream *stream; - if(_size>OP_MEM_SIZE_MAX)return NULL; - stream=(OpusMemStream *)_ogg_malloc(sizeof(*stream)); - if(stream!=NULL){ - *_cb=*&OP_MEM_CALLBACKS; - stream->data=_data; - stream->size=_size; - stream->pos=0; - } - return stream; -} diff --git a/thirdparty/opus/tansig_table.h b/thirdparty/opus/tansig_table.h deleted file mode 100644 index c76f844a72..0000000000 --- a/thirdparty/opus/tansig_table.h +++ /dev/null @@ -1,45 +0,0 @@ -/* This file is auto-generated by gen_tables */ - -static const float tansig_table[201] = { -0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f, -0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f, -0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f, -0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f, -0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f, -0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f, -0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f, -0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f, -0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f, -0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f, -0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f, -0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f, -0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f, -0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f, -0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f, -0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f, -0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f, -0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f, -0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f, -0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f, -0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f, -0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f, -0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f, -0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f, -0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f, -0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f, -0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f, -0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f, -0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f, -0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f, -0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f, -0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f, -0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f, -0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f, -0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f, -0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f, -0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, -0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, -1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, -1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, -1.000000f, -}; -- cgit v1.2.3